From b5cb1921a90f7b0f08b7c47119f1b7524e2b6edd Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Sun, 10 Jun 2018 18:44:42 +0900 Subject: [PATCH v18 2/4] Support atomic commit among multiple foreign servers. --- doc/src/sgml/catalogs.sgml | 97 + doc/src/sgml/config.sgml | 124 ++ doc/src/sgml/fdwhandler.sgml | 200 ++ doc/src/sgml/func.sgml | 51 + doc/src/sgml/monitoring.sgml | 56 + src/backend/access/rmgrdesc/Makefile | 8 +- src/backend/access/rmgrdesc/fdwxactdesc.c | 65 + src/backend/access/rmgrdesc/xlogdesc.c | 6 +- src/backend/access/transam/Makefile | 6 +- src/backend/access/transam/rmgr.c | 1 + src/backend/access/transam/twophase.c | 42 + src/backend/access/transam/xact.c | 26 +- src/backend/access/transam/xlog.c | 32 +- src/backend/catalog/system_views.sql | 11 + src/backend/commands/foreigncmds.c | 23 + src/backend/executor/execPartition.c | 4 + src/backend/executor/nodeForeignscan.c | 8 + src/backend/executor/nodeModifyTable.c | 5 + src/backend/foreign/Makefile | 2 +- src/backend/foreign/fdwxact.c | 2762 +++++++++++++++++++++++++ src/backend/foreign/fdwxact_launcher.c | 587 ++++++ src/backend/foreign/fdwxact_resolver.c | 310 +++ src/backend/foreign/foreign.c | 43 + src/backend/postmaster/bgworker.c | 8 + src/backend/postmaster/pgstat.c | 18 + src/backend/postmaster/postmaster.c | 15 +- src/backend/replication/logical/decode.c | 1 + src/backend/storage/ipc/ipci.c | 5 + src/backend/storage/ipc/procarray.c | 46 + src/backend/storage/lmgr/lwlocknames.txt | 2 + src/backend/storage/lmgr/proc.c | 8 + src/backend/tcop/postgres.c | 14 + src/backend/utils/misc/guc.c | 61 + src/backend/utils/misc/postgresql.conf.sample | 16 + src/backend/utils/probes.d | 2 + src/bin/initdb/initdb.c | 1 + src/bin/pg_controldata/pg_controldata.c | 2 + src/bin/pg_resetwal/pg_resetwal.c | 2 + src/bin/pg_waldump/rmgrdesc.c | 1 + src/include/access/rmgrlist.h | 1 + src/include/access/twophase.h | 1 + src/include/access/xact.h | 7 + src/include/access/xlog_internal.h | 1 + src/include/catalog/pg_control.h | 1 + src/include/catalog/pg_proc.dat | 23 + src/include/foreign/fdwapi.h | 18 +- src/include/foreign/fdwxact.h | 147 ++ src/include/foreign/fdwxact_launcher.h | 31 + src/include/foreign/fdwxact_resolver.h | 23 + src/include/foreign/fdwxact_xlog.h | 51 + src/include/foreign/foreign.h | 2 +- src/include/foreign/resolver_internal.h | 65 + src/include/pgstat.h | 8 +- src/include/storage/proc.h | 10 + src/include/storage/procarray.h | 5 + src/include/utils/guc_tables.h | 2 + src/test/regress/expected/rules.out | 12 + 57 files changed, 5052 insertions(+), 27 deletions(-) create mode 100644 src/backend/access/rmgrdesc/fdwxactdesc.c create mode 100755 src/backend/foreign/fdwxact.c create mode 100644 src/backend/foreign/fdwxact_launcher.c create mode 100644 src/backend/foreign/fdwxact_resolver.c create mode 100644 src/include/foreign/fdwxact.h create mode 100644 src/include/foreign/fdwxact_launcher.h create mode 100644 src/include/foreign/fdwxact_resolver.h create mode 100644 src/include/foreign/fdwxact_xlog.h create mode 100644 src/include/foreign/resolver_internal.h diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 0179dee..792f361 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -9622,6 +9622,103 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx + + <structname>pg_prepared_fdw_xacts</structname> + + + pg_prepared_fdw_xacts + + + + The view pg_prepared_fdw_xacts displays + information about foreign transactions that are currently prepared on + foreign servers for atomic distributed transaction commit (see + for details). + + + + pg_prepared_xacts contains one row per prepared + foreign transaction. An entry is removed when the foreign transaction is + committed or rolled back. + + + + <structname>pg_prepared_fdw_xacts</structname> Columns + + + + + Name + Type + References + Description + + + + + dbid + oid + pg_database.oid + + OID of the database which the foreign transaction resides in + + + + transaction + xid + + + Transaction id that this foreign transaction associates with + + + + serverid + oid + pg_foreign_server.oid + + The OID of the foreign server that this foreign server is prepared + + + + userid + oid + pg_user.oid + + The OID of the user that prepared this foreign transaction. + + + + status + text + + + Status of foreign transaction: prepared, committing, aborting or unknown + + + + identifier + text + + + The identifier of the prepared foreign transaction. + + + + +
+ + + When the pg_prepared_xacts view is accessed, the + internal transaction manager data structures are momentarily locked, and + a copy is made for the view to display. This ensures that the + view produces a consistent set of results, while not blocking + normal operations longer than necessary. Nonetheless + there could be some impact on database performance if this view is + frequently accessed. + + +
+ <structname>pg_publication_tables</structname> diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f11b8f7..406fd9c 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1546,6 +1546,29 @@ include_dir 'conf.d' + + max_prepared_foreign_transactions (integer) + + max_prepared_foreign_transactions configuration parameter + + + + + Sets the maximum number of foreign transactions that can be prepared + simultaneously. A single local transaction can give rise to multiple + foreign transaction. If N local transactions each + across K foreign server this value need to be set + N * K, not just N. + This parameter can only be set at server start. + + + When running a standby server, you must set this parameter to the + same or higher value than on the master server. Otherwise, queries + will not be allowed in the standby server. + + + + work_mem (integer) @@ -3611,6 +3634,78 @@ ANY num_sync ( + Foreign Transaction Resolvers + + + These settings control the behavior of a foreign transaction resolver. + + + + + + max_foreign_transaction_resolvers (int) + + max_foreign_transaction_resolvers configuration parameter + + + + + Specifies maximum number of foreign transaction resolution workers. A foreign transaction + resolver is responsible for foreign transaction resolution on one database. + + + Foreign transaction resolution workers are taken from the pool defined by + max_worker_processes. + + + The default value is 0. + + + + + + foreign_transaction_resolution_retry_interval (integer) + + foreign_transaction_resolution_interval configuration parameter + + + + + Specify how long the foreign transaction resolver should wait when the last resolution + fails before retrying to resolve foreign transaction. This parameter can only be set in the + postgresql.conf file or on the server command line. + + + The default value is 10 seconds. + + + + + + foreign_transaction_resolver_timeout (integer) + + foreign_transaction_resolver_timeout configuration parameter + + + + + Terminate foreign transaction resolver processes that don't have any foreign + transactions to resolve longer than the specified number of milliseconds. + A value of zero disables the timeout mechanism. You should set this value to + zero only if you set max_foreign_transaction_resolvers as + much as databases you have. This parameter can only be set in the + postgresql.conf file or on the server command line. + + + The default value is 60 seconds. + + + + + + + @@ -7826,6 +7921,35 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' + + Foreign Transaction Management + + + + + foreign_twophase_commit (bool) + + foreign_twophase_commit configuration parameter + + + + + Specifies whether transaction commit will wait for all involving foreign transaction + to be resolved before the command returns a "success" indication to the client. + Both max_prepared_foreign_transactions and + max_foreign_transaction_resolvers must be non-zero value to + allow foreign twophase commit to be used. + + + This parameter can be changed at any time; the behavior for any one transaction + is determined by the setting in effect when it commits. + + + + + + + Version and Platform Compatibility diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml index 4ce88dd..24c635c 100644 --- a/doc/src/sgml/fdwhandler.sgml +++ b/doc/src/sgml/fdwhandler.sgml @@ -1390,6 +1390,109 @@ ReparameterizeForeignPathByChild(PlannerInfo *root, List *fdw_private, + + FDW Routines For Transaction Managements + + + If an FDW wishes to support atomic commit + (as described in ), it must call the + registrasaction function FdwXactRegisterForeignTransaction + and provide the following callback functions: + + + + +bool +PrepareForeignTransaction(ForeignTransaction *foreign_xact); + + Prepare a foreign transaction identified by foreign_xact. + This function is called at the pre-commit phase of the local + transaction if atomic commit is + required. Returning true means that preparing + the foreign transaction got successful. + + + +bool +CommitForeignTransaction(ForeignTransaction *foreign_xact); + + Commit a not-prepared foreign transaction identified by + foreign_xact. + This function is called at the pre-commit phase of local + transaction if atomic commit is not required. The atomic + commit is not required either when we modified data on + only one server including local server or when user doesn't + request atomic commit by . + Returning true means that commit the + foreign transaction got successful. + + + +bool +RollbackForeignTransaction(ForeignTransaction *foreign_xact); + + Rollback a not-prepared foreign transaction identified by + foreign_xact. + This function is called at the end of local transaction after + rollbacked locally either when user requested rollback or when + any error occurs within the transaction. This function could + be called recursively if any error occurs during rollback the + foreign transaction for whatever reason. You need to track + recursion and prevent this function from being called infinitely. + Returning true means that rollback the + foreign transaction got successful. + + + +bool +ResolvePreparedForeignTransaction(ForeignTransaction *foreign_xact, + bool is_commit); + + Commit or rollback the prepared foreign transaction identified + by foreign_xact. on a connection to foreign server + When is_commit is true, it indicate that the foreign + transaction should be committed. + This function normally is called by the foreign transaction resolver + process but can also be called by pg_resovle_fdw_xacts + function. In the resolver process, this function is called either + when a backend requests the resolver process to resolve a distributed + transaction after prepared or when a database has dangling + transaction. Returning true means that resolving + the foreign transaction got successful. + In abort case, please note that the prepared foreign transaction + having identifier foreign__xact->fx_id might not + exist on the foreign server. If you failed to resolve the foreign + transaction due to undefined object error + (ERRCODE_UNDEFINED_OBJECT) you should regards + it as success and return true. + + + +bool +IsTwoPhaseCommitEnabled(Oid serverid); + + Return true if foreign server identified by + serverid is capable of two-phase commit protocol. + This function is called when the transaction begins to modify data on + the foreign server. Return false indicates that + the current transaction cannot use atomic commit even if atomic commit + is requested by user. + + + + + Functions PrepareForeignTransaction, + CommitForeignTransaction and + RolblackForeignTransaction are called + at outside of a valid transaction state. So please note that + you cannot use functions that use the system catalog cache + such as Foreign Data Wrapper helper functions described in + . To get informations of FDW-related + objects, you can use given a ForeignTransaction + instead (see foreign/fdwxact.h for details). + + + @@ -1835,4 +1938,101 @@ GetForeignServerByName(const char *name, bool missing_ok); + + Transaction managements for Foreign Data Wrappers + + + Atomic commit among multiple foreign servers + + + PostgreSQL foreign transaction manager + allows FDWs to read and write data on foreign server within a transaction while + maintaining atomicity of the foreign data (aka atomic commit). Using + atomic commit, it guarantees that a distributed transaction is committed + or rollbacked on all participants foreign + server. To achieve atomic commit, PostgreSQL + employees two-phase commit protocol, which is a type of atomic commitment + protocol. Every FDW that wish to support atomic commit + is required to support transaction management callback routines + (see for details) + and register the foreign transaction using + FdwXactRegisterForeignTransaction when starting a + transaction on the foreign server. Transaction of registered foreign server + is managed by the foreign transaction manager. + +void +FdwXactRegisterForeignTransaction(Oid serverid, Oid userid, char *fx_id) + + This function should be called when a transaction starts on the foreign server. + serverid and userid are OIDs + which specify the transaction starts on what server by who. fx_id + is null-terminated string which is an identifer of foreign transaction and it + will be passed when transaction management APIs is called. The length of + fx_id must be less than 200 bytes. Also this identifier + must be unique enough so that it doesn't conflict other concurrent foreign + transactions. fx_id can be NULL. + If it's NULL, a transaction identifier is automacitally + generated with in the form of + fx_<random value up to 231>_<server oid>_<user oid>. + Since this identifier is used per foreign transaction and the xid of unresolved + distributed transaction never reused, an auto-generated identifier is fairly + enough to ensure uniqueness. It's recommended to generate foreign transaction + identifier in FDW if the format of auto-generated identifier doesn't match + the requirement of the foreign server. + + + + An example of such transaction is as follows + +BEGIN; +UPDATE ft1 SET col = 'a'; +UPDATE ft2 SET col = 'b'; +COMMIT; + + ft1 and ft2 are foreign tables on different foreign servers may be using different + Foreign Data Wrappers. + + + + When a transaction starts on the foreign server, FDW that wishes atomic + commit must register the foreign transaction as a participant by calling + FdwXactRegisterForeignTransaction. Also during + transaction, IsTwoPhaseCommitEnabled is called whenever + the transaction begins to modify data on the foreign server. If FDW wishes + atomic commit IsTwoPhaseCommitEnabled must return + true. All foreign transaction participants must + return true to achieve atomic commit. + + + + During pre-commit phase of local transaction, the foreign transaction manager + persists the foreign transaction information to the disk and WAL, and then + prepare all foreign transaction by calling PrepareForeignTransaction + if two-phase commit protocol is required. Two-phase commit is required only if + the transaction modified data on more than one servers including the local + server and user requests atomic commit. PostgreSQL + can commit locally and go to the next step if and only if all preparing foreign + transactions got successful. If two-phase commit is not required, the foreign + transaction manager commits a transaction on the foreign server by calling + CommitForeignTransaction and then + PostgreSQL commits locally. The foreign transaction + manager doesn't do any further change on foreign transactions from this point + forward. If any failure happens for whatever reason, for example a network + failure or user request until PostgreSQL commits + locally the foreign transaction manager changes over to rollback and calls + RollbackForeignTransaction for every foreign servers to + close the current transaction on foreign servers. + + + + When two-phase commit is required, after committed locally, each the transaction + commits will wait for all prepared foreign transaction to be resolved before + the commit completes. The foreign transaction resolver is responsible for + foreign transaction resolution. ResolverForeignTransaction + is called by the foreign transaction resolver process when it resolves a foreign + transactions. ResolveForeignTransaction is also be called + when user execute pg_resovle_fdw_xact function. + + + diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 9a7f683..8ed007c 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -20755,6 +20755,57 @@ SELECT (pg_stat_file('filename')).modification; + + Foreign Transaction Management Functions + + + pg_resolve_fdw_xacts + + + pg_remove_fdw_xacts + + + + shows the functions + available for foreign transaction management. + These functions cannot be executed during recovery. Use of these function + is restricted to superusers. + + + + Foreign Transaction Management Functions + + + Name Return Type Description + + + + + + pg_resolve_fdw_xact(transaction xid, userid oid, userid oid) + + bool + + Resolve a foreign transaction. This function search for foreign transaction + matching the arguments and resolves then. This function won't resolve + a foreign transaction which is in progress, or one that is locked by some + other backend. + + + + + pg_remove_fdw_xact(transaction xid, serverid oid, userid oid) + + void + + This function works the same as pg_resolve_fdw_xact + except it remove foreign transaction entry without resolving. + + + + +
+
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 0484cfa..635a5e7 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -332,6 +332,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser + + pg_stat_fdw_xact_resolverpg_stat_fdw_xact_resolver + One row per foreign transaction resolver process, showing statistics about + foreign transaction resolution. See for + details. + + + @@ -1194,6 +1202,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser Waiting in main loop of checkpointer process. + FdwXactLauncherMain + Waiting in main loop of foreign transaction resolution launcher process. + + + FdwXactResolverMain + Waiting in main loop of foreign transaction resolution worker process. + + LogicalLauncherMain Waiting in main loop of logical launcher process. @@ -1405,6 +1421,10 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser Waiting for confirmation from remote server during synchronous replication. + FdwXactResolution + Waiting for all foreign transaction participants to be resolved during atomic commit among foreign servers. + + Timeout BaseBackupThrottle Waiting during base backup when throttling activity. @@ -2214,6 +2234,42 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i connection. + + <structname>pg_stat_fdw_xact_resolver</structname> View + + + + Column + Type + Description + + + + + + pid + integer + Process ID of a foreign transaction resolver process + + + dbid + oid + OID of the database to which the foreign transaction resolver is connected + + + last_resolved_time + timestamp with time zone + Time at which the process last resolved a foreign transaction + + + +
+ + + The pg_stat_fdw_xact_resolver view will contain one + row per foreign transaction resolver process, showing state of resolution + of foreign transactions. + <structname>pg_stat_archiver</structname> View diff --git a/src/backend/access/rmgrdesc/Makefile b/src/backend/access/rmgrdesc/Makefile index 5514db1..742e825 100644 --- a/src/backend/access/rmgrdesc/Makefile +++ b/src/backend/access/rmgrdesc/Makefile @@ -8,9 +8,9 @@ subdir = src/backend/access/rmgrdesc top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = brindesc.o clogdesc.o committsdesc.o dbasedesc.o genericdesc.o \ - gindesc.o gistdesc.o hashdesc.o heapdesc.o logicalmsgdesc.o \ - mxactdesc.o nbtdesc.o relmapdesc.o replorigindesc.o seqdesc.o \ - smgrdesc.o spgdesc.o standbydesc.o tblspcdesc.o xactdesc.o xlogdesc.o +OBJS = brindesc.o clogdesc.o committsdesc.o dbasedesc.o fdwxactdesc.o \ + genericdesc.o gindesc.o gistdesc.o hashdesc.o heapdesc.o \ + logicalmsgdesc.o mxactdesc.o nbtdesc.o relmapdesc.o replorigindesc.o \ + seqdesc.o smgrdesc.o spgdesc.o standbydesc.o tblspcdesc.o xactdesc.o xlogdesc.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/rmgrdesc/fdwxactdesc.c b/src/backend/access/rmgrdesc/fdwxactdesc.c new file mode 100644 index 0000000..3705104 --- /dev/null +++ b/src/backend/access/rmgrdesc/fdwxactdesc.c @@ -0,0 +1,65 @@ +/*------------------------------------------------------------------------- + * + * fdw_xactdesc.c + * PostgreSQL distributed transaction manager for foreign server. + * + * This module describes the WAL records for foreign transaction manager. + * + * Portions Copyright (c) 2018, PostgreSQL Global Development Group + * + * src/backend/access/transam/fdw_xactdesc.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "foreign/fdwxact_xlog.h" + +void +fdw_xact_desc(StringInfo buf, XLogReaderState *record) +{ + char *rec = XLogRecGetData(record); + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + if (info == XLOG_FDW_XACT_INSERT) + { + FdwXactOnDiskData *fdw_insert_xlog = (FdwXactOnDiskData *) rec; + + appendStringInfo(buf, "Foreign server oid: %u", fdw_insert_xlog->serverid); + appendStringInfo(buf, " user oid: %u", fdw_insert_xlog->userid); + appendStringInfo(buf, " database id: %u", fdw_insert_xlog->dbid); + appendStringInfo(buf, " local xid: %u", fdw_insert_xlog->local_xid); + /* TODO: This should be really interpreted by each FDW */ + + /* + * TODO: we also need to assess whether we want to add this + * information + */ + appendStringInfo(buf, " foreign transaction info: %s", + fdw_insert_xlog->fdw_xact_id); + } + else + { + xl_fdw_xact_remove *fdw_remove_xlog = (xl_fdw_xact_remove *) rec; + + appendStringInfo(buf, "Foreign server oid: %u", fdw_remove_xlog->serverid); + appendStringInfo(buf, " user oid: %u", fdw_remove_xlog->userid); + appendStringInfo(buf, " database id: %u", fdw_remove_xlog->dbid); + appendStringInfo(buf, " local xid: %u", fdw_remove_xlog->xid); + } + +} + +const char * +fdw_xact_identify(uint8 info) +{ + switch (info & ~XLR_INFO_MASK) + { + case XLOG_FDW_XACT_INSERT: + return "NEW FOREIGN TRANSACTION"; + case XLOG_FDW_XACT_REMOVE: + return "REMOVE FOREIGN TRANSACTION"; + } + /* Keep compiler happy */ + return NULL; +} diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 00741c7..023a7c5 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -112,14 +112,16 @@ xlog_desc(StringInfo buf, XLogReaderState *record) appendStringInfo(buf, "max_connections=%d max_worker_processes=%d " "max_prepared_xacts=%d max_locks_per_xact=%d " "wal_level=%s wal_log_hints=%s " - "track_commit_timestamp=%s", + "track_commit_timestamp=%s " + "max_prepared_foreign_xacts=%d", xlrec.MaxConnections, xlrec.max_worker_processes, xlrec.max_prepared_xacts, xlrec.max_locks_per_xact, wal_level_str, xlrec.wal_log_hints ? "on" : "off", - xlrec.track_commit_timestamp ? "on" : "off"); + xlrec.track_commit_timestamp ? "on" : "off", + xlrec.max_prepared_foreign_xacts); } else if (info == XLOG_FPW_CHANGE) { diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile index 16fbe47..f15c83a 100644 --- a/src/backend/access/transam/Makefile +++ b/src/backend/access/transam/Makefile @@ -12,9 +12,9 @@ subdir = src/backend/access/transam top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = clog.o commit_ts.o generic_xlog.o multixact.o parallel.o rmgr.o slru.o \ - subtrans.o timeline.o transam.o twophase.o twophase_rmgr.o varsup.o \ - xact.o xlog.o xlogarchive.o xlogfuncs.o \ +OBJS = clog.o commit_ts.o generic_xlog.o multixact.o \ + parallel.o rmgr.o slru.o subtrans.o timeline.o transam.o twophase.o \ + twophase_rmgr.o varsup.o xact.o xlog.o xlogarchive.o xlogfuncs.o \ xloginsert.o xlogreader.o xlogutils.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/transam/rmgr.c b/src/backend/access/transam/rmgr.c index 9368b56..b5c3502 100644 --- a/src/backend/access/transam/rmgr.c +++ b/src/backend/access/transam/rmgr.c @@ -24,6 +24,7 @@ #include "commands/dbcommands_xlog.h" #include "commands/sequence.h" #include "commands/tablespace.h" +#include "foreign/fdwxact.h" #include "replication/message.h" #include "replication/origin.h" #include "storage/standby.h" diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 3942734..839e768 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -89,6 +89,7 @@ #include "access/xlogreader.h" #include "catalog/pg_type.h" #include "catalog/storage.h" +#include "foreign/fdwxact.h" #include "funcapi.h" #include "miscadmin.h" #include "pg_trace.h" @@ -844,6 +845,35 @@ TwoPhaseGetGXact(TransactionId xid) } /* + * TwoPhaseExists + * Return true if there is a prepared transaction specified by XID + */ +bool +TwoPhaseExists(TransactionId xid) +{ + int i; + bool found = false; + + LWLockAcquire(TwoPhaseStateLock, LW_SHARED); + + for (i = 0; i < TwoPhaseState->numPrepXacts; i++) + { + GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + PGXACT *pgxact = &ProcGlobal->allPgXact[gxact->pgprocno]; + + if (pgxact->xid == xid) + { + found = true; + break; + } + } + + LWLockRelease(TwoPhaseStateLock); + + return found; +} + +/* * TwoPhaseGetDummyProc * Get the dummy backend ID for prepared transaction specified by XID * @@ -2316,6 +2346,12 @@ RecordTransactionCommitPrepared(TransactionId xid, * in the procarray and continue to hold locks. */ SyncRepWaitForLSN(recptr, true); + + /* + * Wait for foreign transaction prepared as part of this prepared + * transaction to be committed. + */ + FdwXactWaitToBeResolved(xid, true); } /* @@ -2375,6 +2411,12 @@ RecordTransactionAbortPrepared(TransactionId xid, * in the procarray and continue to hold locks. */ SyncRepWaitForLSN(recptr, false); + + /* + * Wait for foreign transaction prepared as part of this prepared + * transaction to be committed. + */ + FdwXactWaitToBeResolved(xid, false); } /* diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 875be18..c4c879d 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -36,6 +36,7 @@ #include "commands/tablecmds.h" #include "commands/trigger.h" #include "executor/spi.h" +#include "foreign/fdwxact.h" #include "libpq/be-fsstubs.h" #include "libpq/pqsignal.h" #include "miscadmin.h" @@ -1108,6 +1109,7 @@ RecordTransactionCommit(void) SharedInvalidationMessage *invalMessages = NULL; bool RelcacheInitFileInval = false; bool wrote_xlog; + bool need_twophase_for_ac; /* Get data needed for commit record */ nrels = smgrGetPendingDeletes(true, &rels); @@ -1116,6 +1118,7 @@ RecordTransactionCommit(void) nmsgs = xactGetCommittedInvalidationMessages(&invalMessages, &RelcacheInitFileInval); wrote_xlog = (XactLastRecEnd != 0); + need_twophase_for_ac = ForeignTwophaseCommitRequired(); /* * If we haven't been assigned an XID yet, we neither can, nor do we want @@ -1154,12 +1157,13 @@ RecordTransactionCommit(void) } /* - * If we didn't create XLOG entries, we're done here; otherwise we - * should trigger flushing those entries the same as a commit record + * If we didn't create XLOG entries and the transaction does not need + * to be committed using two-phase commit. we're done here; otherwise + * we should trigger flushing those entries the same as a commit record * would. This will primarily happen for HOT pruning and the like; we * want these to be flushed to disk in due time. */ - if (!wrote_xlog) + if (!wrote_xlog && !need_twophase_for_ac) goto cleanup; } else @@ -1317,6 +1321,14 @@ RecordTransactionCommit(void) if (wrote_xlog && markXidCommitted) SyncRepWaitForLSN(XactLastRecEnd, true); + /* + * Wait for prepared foreign transaction to be resolved, if required. + * We only want to wait if we prepared foreign transaction in this + * transaction. + */ + if (need_twophase_for_ac && markXidCommitted) + FdwXactWaitToBeResolved(xid, true); + /* remember end of last commit record */ XactLastCommitEnd = XactLastRecEnd; @@ -1955,6 +1967,9 @@ CommitTransaction(void) break; } + /* Pre-commit step for foreign transactions */ + PreCommit_FdwXacts(); + CallXactCallbacks(is_parallel_worker ? XACT_EVENT_PARALLEL_PRE_COMMIT : XACT_EVENT_PRE_COMMIT); @@ -2110,6 +2125,7 @@ CommitTransaction(void) AtEOXact_PgStat(true); AtEOXact_Snapshot(true, false); AtEOXact_ApplyLauncher(true); + AtEOXact_FdwXacts(true); pgstat_report_xact_timestamp(0); CurrentResourceOwner = NULL; @@ -2197,6 +2213,8 @@ PrepareTransaction(void) * the transaction-abort path. */ + AtPrepare_FdwXacts(); + /* Shut down the deferred-trigger manager */ AfterTriggerEndXact(true); @@ -2385,6 +2403,7 @@ PrepareTransaction(void) AtEOXact_Files(true); AtEOXact_ComboCid(); AtEOXact_HashTables(true); + AtEOXact_FdwXacts(true); /* don't call AtEOXact_PgStat here; we fixed pgstat state above */ AtEOXact_Snapshot(true, true); pgstat_report_xact_timestamp(0); @@ -2589,6 +2608,7 @@ AbortTransaction(void) AtEOXact_HashTables(false); AtEOXact_PgStat(false); AtEOXact_ApplyLauncher(false); + AtEOXact_FdwXacts(false); pgstat_report_xact_timestamp(0); } diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 7375a78..2a168cd 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -40,6 +40,7 @@ #include "catalog/pg_control.h" #include "catalog/pg_database.h" #include "commands/tablespace.h" +#include "foreign/fdwxact.h" #include "miscadmin.h" #include "pgstat.h" #include "port/atomics.h" @@ -5267,6 +5268,7 @@ BootStrapXLOG(void) ControlFile->MaxConnections = MaxConnections; ControlFile->max_worker_processes = max_worker_processes; ControlFile->max_prepared_xacts = max_prepared_xacts; + ControlFile->max_prepared_foreign_xacts = max_prepared_foreign_xacts; ControlFile->max_locks_per_xact = max_locks_per_xact; ControlFile->wal_level = wal_level; ControlFile->wal_log_hints = wal_log_hints; @@ -6354,6 +6356,9 @@ CheckRequiredParameterValues(void) RecoveryRequiresIntParameter("max_prepared_transactions", max_prepared_xacts, ControlFile->max_prepared_xacts); + RecoveryRequiresIntParameter("max_prepared_foreign_transactions", + max_prepared_foreign_xacts, + ControlFile->max_prepared_foreign_xacts); RecoveryRequiresIntParameter("max_locks_per_transaction", max_locks_per_xact, ControlFile->max_locks_per_xact); @@ -6878,14 +6883,15 @@ StartupXLOG(void) restoreTimeLineHistoryFiles(ThisTimeLineID, recoveryTargetTLI); /* - * Before running in recovery, scan pg_twophase and fill in its status to - * be able to work on entries generated by redo. Doing a scan before - * taking any recovery action has the merit to discard any 2PC files that - * are newer than the first record to replay, saving from any conflicts at - * replay. This avoids as well any subsequent scans when doing recovery - * of the on-disk two-phase data. + * Before running in recovery, scan pg_twophase and pg_fdw_xact, and then + * fill in its status to be able to work on entries generated by redo. + * Doing a scan before taking any recovery action has the merit to discard + * any state files that are newer than the first record to replay, saving + * from any conflicts at replay. This avoids as well any subsequent scans + * when doing recovery of the on-disk two-phase or fdwxact data. */ restoreTwoPhaseData(); + restoreFdwXactData(); lastFullPageWrites = checkPoint.fullPageWrites; @@ -7077,7 +7083,10 @@ StartupXLOG(void) InitRecoveryTransactionEnvironment(); if (wasShutdown) + { oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids); + oldestActiveXID = PrescanFdwXacts(oldestActiveXID); + } else oldestActiveXID = checkPoint.oldestActiveXid; Assert(TransactionIdIsValid(oldestActiveXID)); @@ -7583,6 +7592,7 @@ StartupXLOG(void) * as potential problems are detected before any on-disk change is done. */ oldestActiveXID = PrescanPreparedTransactions(NULL, NULL); + oldestActiveXID = PrescanFdwXacts(oldestActiveXID); /* * Consider whether we need to assign a new timeline ID. @@ -7901,6 +7911,9 @@ StartupXLOG(void) /* Reload shared-memory state for prepared transactions */ RecoverPreparedTransactions(); + /* Load all foreign transaction entries from disk to memory */ + RecoverFdwXacts(); + /* * Shutdown the recovery environment. This must occur after * RecoverPreparedTransactions(), see notes for lock_twophase_recover() @@ -9217,6 +9230,7 @@ CheckPointGuts(XLogRecPtr checkPointRedo, int flags) CheckPointReplicationOrigin(); /* We deliberately delay 2PC checkpointing as long as possible */ CheckPointTwoPhase(checkPointRedo); + CheckPointFdwXacts(checkPointRedo); } /* @@ -9650,7 +9664,8 @@ XLogReportParameters(void) max_worker_processes != ControlFile->max_worker_processes || max_prepared_xacts != ControlFile->max_prepared_xacts || max_locks_per_xact != ControlFile->max_locks_per_xact || - track_commit_timestamp != ControlFile->track_commit_timestamp) + track_commit_timestamp != ControlFile->track_commit_timestamp || + max_prepared_foreign_xacts != ControlFile->max_prepared_foreign_xacts) { /* * The change in number of backend slots doesn't need to be WAL-logged @@ -9682,6 +9697,7 @@ XLogReportParameters(void) ControlFile->MaxConnections = MaxConnections; ControlFile->max_worker_processes = max_worker_processes; ControlFile->max_prepared_xacts = max_prepared_xacts; + ControlFile->max_prepared_foreign_xacts = max_prepared_foreign_xacts; ControlFile->max_locks_per_xact = max_locks_per_xact; ControlFile->wal_level = wal_level; ControlFile->wal_log_hints = wal_log_hints; @@ -9887,6 +9903,7 @@ xlog_redo(XLogReaderState *record) RunningTransactionsData running; oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids); + oldestActiveXID = PrescanFdwXacts(oldestActiveXID); /* * Construct a RunningTransactions snapshot representing a shut @@ -10085,6 +10102,7 @@ xlog_redo(XLogReaderState *record) ControlFile->MaxConnections = xlrec.MaxConnections; ControlFile->max_worker_processes = xlrec.max_worker_processes; ControlFile->max_prepared_xacts = xlrec.max_prepared_xacts; + ControlFile->max_prepared_foreign_xacts = xlrec.max_prepared_foreign_xacts; ControlFile->max_locks_per_xact = xlrec.max_locks_per_xact; ControlFile->wal_level = xlrec.wal_level; ControlFile->wal_log_hints = xlrec.wal_log_hints; diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 7251552..5fa6065 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -291,6 +291,9 @@ CREATE VIEW pg_prepared_xacts AS CREATE VIEW pg_prepared_statements AS SELECT * FROM pg_prepared_statement() AS P; +CREATE VIEW pg_prepared_fdw_xacts AS + SELECT * FROM pg_prepared_fdw_xacts() AS F; + CREATE VIEW pg_seclabels AS SELECT l.objoid, l.classoid, l.objsubid, @@ -773,6 +776,14 @@ CREATE VIEW pg_stat_subscription AS LEFT JOIN pg_stat_get_subscription(NULL) st ON (st.subid = su.oid); +CREATE VIEW pg_stat_fdwxact_resolvers AS + SELECT + r.pid, + r.dbid, + r.last_resolved_time + FROM pg_stat_get_fdwxact_resolver() r + WHERE r.pid IS NOT NULL; + CREATE VIEW pg_stat_ssl AS SELECT S.pid, diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c index e5dd995..50c31ef 100644 --- a/src/backend/commands/foreigncmds.c +++ b/src/backend/commands/foreigncmds.c @@ -28,6 +28,7 @@ #include "catalog/pg_user_mapping.h" #include "commands/defrem.h" #include "foreign/fdwapi.h" +#include "foreign/fdwxact.h" #include "foreign/foreign.h" #include "miscadmin.h" #include "parser/parse_func.h" @@ -1093,6 +1094,18 @@ RemoveForeignServerById(Oid srvId) if (!HeapTupleIsValid(tp)) elog(ERROR, "cache lookup failed for foreign server %u", srvId); + /* + * If there is a foreign prepared transaction with this foreign server, + * dropping it might result in dangling prepared transaction. + */ + if (fdw_xact_exists(InvalidTransactionId, MyDatabaseId, srvId, InvalidOid)) + { + Form_pg_foreign_server srvForm = (Form_pg_foreign_server) GETSTRUCT(tp); + ereport(WARNING, + (errmsg("server \"%s\" has unresolved prepared transactions on it", + NameStr(srvForm->srvname)))); + } + CatalogTupleDelete(rel, &tp->t_self); ReleaseSysCache(tp); @@ -1407,6 +1420,16 @@ RemoveUserMapping(DropUserMappingStmt *stmt) user_mapping_ddl_aclcheck(useId, srv->serverid, srv->servername); /* + * If there is a foreign prepared transaction with this user mapping, + * dropping it might result in dangling prepared transaction. + */ + if (fdw_xact_exists(InvalidTransactionId, MyDatabaseId, srv->serverid, + useId)) + ereport(WARNING, + (errmsg("server \"%s\" has unresolved prepared transaction for user \"%s\"", + srv->servername, MappingUserName(useId)))); + + /* * Do the deletion */ object.classId = UserMappingRelationId; diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index ec7a526..ea31749 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -19,6 +19,7 @@ #include "executor/execPartition.h" #include "executor/executor.h" #include "foreign/fdwapi.h" +#include "foreign/fdwxact.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -744,7 +745,10 @@ ExecInitRoutingInfo(ModifyTableState *mtstate, */ if (partRelInfo->ri_FdwRoutine != NULL && partRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL) + { partRelInfo->ri_FdwRoutine->BeginForeignInsert(mtstate, partRelInfo); + FdwXactMarkForeignTransactionModified(partRelInfo, 0); + } MemoryContextSwitchTo(oldContext); diff --git a/src/backend/executor/nodeForeignscan.c b/src/backend/executor/nodeForeignscan.c index a2a28b7..30a0b66 100644 --- a/src/backend/executor/nodeForeignscan.c +++ b/src/backend/executor/nodeForeignscan.c @@ -22,9 +22,11 @@ */ #include "postgres.h" +#include "access/xact.h" #include "executor/executor.h" #include "executor/nodeForeignscan.h" #include "foreign/fdwapi.h" +#include "foreign/fdwxact.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -224,7 +226,13 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, int eflags) * Tell the FDW to initialize the scan. */ if (node->operation != CMD_SELECT) + { fdwroutine->BeginDirectModify(scanstate, eflags); + + /* Mark this transaction modified data on the foreign server */ + FdwXactMarkForeignTransactionModified(estate->es_result_relation_info, + eflags); + } else fdwroutine->BeginForeignScan(scanstate, eflags); diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index bf0d5e8..283bfaf 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -44,6 +44,8 @@ #include "executor/executor.h" #include "executor/nodeModifyTable.h" #include "foreign/fdwapi.h" +#include "foreign/fdwxact.h" +#include "foreign/foreign.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "storage/bufmgr.h" @@ -2317,6 +2319,9 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) fdw_private, i, eflags); + + /* Mark this transaction modified data on the foreign server */ + FdwXactMarkForeignTransactionModified(resultRelInfo, eflags); } resultRelInfo++; diff --git a/src/backend/foreign/Makefile b/src/backend/foreign/Makefile index 85aa857..4329d3e 100644 --- a/src/backend/foreign/Makefile +++ b/src/backend/foreign/Makefile @@ -12,6 +12,6 @@ subdir = src/backend/foreign top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS= foreign.o +OBJS= foreign.o fdwxact.o fdwxact_launcher.o fdwxact_resolver.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/foreign/fdwxact.c b/src/backend/foreign/fdwxact.c new file mode 100755 index 0000000..d284861 --- /dev/null +++ b/src/backend/foreign/fdwxact.c @@ -0,0 +1,2762 @@ +/*------------------------------------------------------------------------- + * + * fdwxact.c + * PostgreSQL distributed transaction manager for foreign servers. + * + * To achieve commit among all foreign servers atomically, we employee + * two-phase commit protocol, which is a type of atomic commitment + * protocol(ACP). The basic strategy is that we prepare all of the remote + * transactions before committing locally and commit them after committing + * locally. + * + * When a foreign data wrapper starts transaction on a foreign server + * that is capable of two-phase commit protocol, it's required to register + * the foreign transaction using function FdwXactRegisterTransaction() in order + * to participate to a group for atomic commit. Participants are identified + * by oid of foreign server and user. When the foreign transaction begins + * to modify data it's required to mark it as modified using + * FdwXactMarkForeignTransactionModified() + * + * During pre-commit of local transaction, we prepare the transaction on + * foreign server everywhere. After committing or rolling back locally, we + * notify the resolver process and tell it to commit or roll back those + * transactions. If we ask it to commit, we also tell it to notify us when + * it's done, so that we can wait interruptibly for it to finish, + * and so that we're not trying to locally do work that might fail when an + * ERROR after already committed. + * + * Two-phase commit protocol is required if the transaction modified + * two or more servers including itself. In other case, all foreign transactions + * are committed during pre-commit. + * + * If any network failure, server crash occurs or user stopped waiting + * prepared foreign transactions are left in in-doubt state (aka. dangling + * transaction). Dangling transactions are processed by the resolve process + * + * During replay WAL and replication FdwXactCtl also holds information about + * active prepared foreign transaction that haven't been moved to disk yet. + * + * Replay of fdwxact records happens by the following rules: + * + * * On PREPARE redo we add the foreign transaction to FdwXactCtl->fdw_xacts. + * We set fdw_xact->inredo to true for such entries. + * * On Checkpoint redo, we iterate through FdwXactCtl->fdw_xacts entries that + * have set fdw_xact->inredo true and are behind the redo_horizon. We save + * them to disk and then set fdw_xact->ondisk to true. + * * On COMMIT and ABORT we delete the entry from FdwXactCtl->fdw_xacts. + * If fdw_xact->ondisk is true, we delete the corresponding file from + * the disk as well. + * * RecoverFdwXacts loads all foreign transaction entries from disk into + * memory at server startup. + * + * Portions Copyright (c) 2018, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/foreign/fdwxact.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include +#include +#include + +#include "access/htup_details.h" +#include "access/twophase.h" +#include "access/xact.h" +#include "access/xlog.h" +#include "access/xloginsert.h" +#include "access/xlogutils.h" +#include "catalog/pg_type.h" +#include "foreign/fdwapi.h" +#include "foreign/fdwxact.h" +#include "foreign/fdwxact_resolver.h" +#include "foreign/fdwxact_launcher.h" +#include "foreign/fdwxact_xlog.h" +#include "foreign/resolver_internal.h" +#include "foreign/foreign.h" +#include "funcapi.h" +#include "libpq/pqsignal.h" +#include "miscadmin.h" +#include "parser/parsetree.h" +#include "pg_trace.h" +#include "pgstat.h" +#include "storage/fd.h" +#include "storage/ipc.h" +#include "storage/latch.h" +#include "storage/lock.h" +#include "storage/proc.h" +#include "storage/procarray.h" +#include "storage/pmsignal.h" +#include "storage/shmem.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/guc.h" +#include "utils/memutils.h" +#include "utils/ps_status.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" + +/* Is atomic commit requested by user? */ +#define AtomicCommitRequested() \ + (foreign_twophase_commit == true && \ + max_prepared_foreign_xacts > 0 && \ + max_foreign_xact_resolvers > 0) + +/* Structure to bundle the foreign transaction participant */ +typedef struct FdwXactParticipant +{ + Oid serverid; + Oid userid; + + /* + * Pointer to a FdwXact entry in global entry. NULL if + * this foreign transaction is registered but not inserted + * yet. + */ + FdwXact fdw_xact; + char *fdw_xact_id; + + /* true if this transaction modified data on the foreign server */ + bool modified; + + /* + * This is initialized at foreign transaction registration and + * passed to API functions. + */ + ForeignTransaction foreign_xact; + + /* Callbacks for foreign transaction */ + PrepareForeignTransaction_function prepare_foreign_xact; + CommitForeignTransaction_function commit_foreign_xact; + RollbackForeignTransaction_function rollback_foreign_xact; +} FdwXactParticipant; + +/* + * List of foreign transaction participants for atomic commit. + * This list has only foreign servers that are capable of two-phase + * commit protocol. + */ +List *FdwXactParticipantsForAC = NIL; + +/* + * This struct tracks all participants involved with transaction 'xid'. + */ +typedef struct FdwXactStateCacheEntry +{ + /* Key -- must be first */ + TransactionId xid; + + /* List of FdwXacts involved with the xid */ + FdwXact participants; +} FdwXactStateCacheEntry; +static HTAB *FdwXactStateCache; + +/* Directory where the foreign prepared transaction files will reside */ +#define FDW_XACTS_DIR "pg_fdw_xact" + +/* + * Name of foreign prepared transaction file is 8 bytes database oid, + * xid, foreign server oid and user oid separated by '_'. + * + * Since FdwXact stat file is created per foreign transaction in a + * distributed transaction and the xid of unresolved distributed + * transaction never reused, the name is fairly enough to ensure + * uniqueness. + */ +#define FDW_XACT_FILE_NAME_LEN (8 + 1 + 8 + 1 + 8 + 1 + 8) +#define FdwXactFilePath(path, dbid, xid, serverid, userid) \ + snprintf(path, MAXPGPATH, FDW_XACTS_DIR "/%08X_%08X_%08X_%08X", \ + dbid, xid, serverid, userid) + +static FdwXact FdwXactRegisterFdwXactEntry(TransactionId xid, FdwXactParticipant *fdw_part); +static void FdwXactPrepareForeignTransactions(void); +static bool FdwXactResolveForeignTransaction(FdwXact fdw_xact); +static void FdwXactComputeRequiredXmin(void); +static void FdwXactQueueInsert(void); +static void FdwXactCancelWait(void); +static void FdwXactRedoAdd(char *buf, XLogRecPtr start_lsn, XLogRecPtr end_lsn); +static void FdwXactRedoRemove(Oid dbid, TransactionId xid, Oid serverid, + Oid userid, bool give_warnings); +static void AtProcExit_FdwXact(int code, Datum arg); +static void ForgetAllFdwXactParticipants(void); +static char *ReadFdwXactFile(Oid dbid, TransactionId xid, Oid serverid, + Oid userid, bool give_warnings); +static void RemoveFdwXactFile(Oid dbid, TransactionId xid, Oid serverid, Oid userid, + bool giveWarning); +static void RecreateFdwXactFile(Oid dbid, TransactionId xid, Oid serverid, Oid userid, + void *content, int len); +static void XlogReadFdwXactData(XLogRecPtr lsn, char **buf, int *len); +static char *ProcessFdwXactBuffer(Oid dbid, TransactionId local_xid, Oid serverid, + Oid userid, XLogRecPtr insert_start_lsn, + bool give_warnings); +static List *get_fdw_xacts(Oid dbid, TransactionId xid, Oid serverid, Oid userid, + bool need_lock); +static FdwXact get_one_fdw_xact(Oid dbid, TransactionId xid, Oid serverid, Oid userid, + bool need_lock); +static FdwXact get_all_fdw_xacts(int *length); +static FdwXact insert_fdw_xact(Oid dbid, TransactionId xid, Oid serverid, Oid userid, + char *fdw_xact_id); +static char *generate_fdw_xact_identifier(Oid serverid, Oid userid); +static void remove_fdw_xact(FdwXact fdw_xact); + +/* Guc parameters */ +int max_prepared_foreign_xacts = 0; +int max_foreign_xact_resolvers = 0; +bool foreign_twophase_commit = false; + +/* Keep track of registering process exit call back. */ +static bool fdwXactExitRegistered = false; + +/* + * Register given foreign transaction identified by given arguments as + * a participant of the transaction. + * + * This function aimes to be called by FDW when foreign transaction + * starts. The foreign server identified by given server id must + * support atomic commit APIs. The foreign transaction is identified + * by given identifier 'fdw_xact_id' which can be NULL. If it's NULL, + * we construct an unique identifer. + * + * After registered, foreign transaction of participants are managed + * by foreign transaction manager until the end of the distributed + * transaction. + */ +void +FdwXactRegisterForeignTransaction(Oid serverid, Oid userid, char *fx_id) +{ + FdwXactParticipant *fdw_part; + ListCell *lc; + ForeignServer *foreign_server; + ForeignDataWrapper *fdw; + UserMapping *user_mapping; + FdwRoutine *fdw_routine; + MemoryContext old_context; + + /* Check length of foreign transaction identifier */ + if (fx_id != NULL && strlen(fx_id) >= NAMEDATALEN) + ereport(ERROR, + (errcode(ERRCODE_NAME_TOO_LONG), + errmsg("foreign transaction identifer \"%s\" is too long", + fx_id), + errdetail("foreign transaction identifier must be less than %d characters.", + NAMEDATALEN))); + + /* Parameter check */ + if (max_prepared_foreign_xacts == 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("prepread foreign transactions are disabled"), + errhint("Set max_prepared_foreign_transactions to a nonzero value."))); + + if (max_foreign_xact_resolvers == 0) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("prepread foreign transactions are disabled"), + errhint("Set max_foreign_xact_resolvers to a nonzero value."))); + + /* Duplication check */ + foreach(lc, FdwXactParticipantsForAC) + { + fdw_part = lfirst(lc); + + /* Quick return if there is already registered connection */ + if (fdw_part->serverid == serverid && fdw_part->userid == userid) + ereport(ERROR, + (errmsg("attempt to start transction again on server %u user %u", + serverid, userid))); + } + + /* + * Participants information is needed at the end of a transaction, when + * system cache are not available. so save it in TopTransactionContext + * before hand so that these can live until the end of transaction. + */ + old_context = MemoryContextSwitchTo(TopTransactionContext); + + foreign_server = GetForeignServer(serverid); + fdw = GetForeignDataWrapper(foreign_server->fdwid); + fdw_routine = GetFdwRoutine(fdw->fdwhandler); + user_mapping = GetUserMapping(userid, serverid); + + /* Make sure that the FDW has transaction handlers */ + if (!fdw_routine->PrepareForeignTransaction) + ereport(ERROR, + (errmsg("no function provided for preparing foreign transaction for FDW %s", + fdw->fdwname))); + if (!fdw_routine->CommitForeignTransaction) + ereport(ERROR, + (errmsg("no function to commit a foreign transaction provided for FDW %s", + fdw->fdwname))); + if (!fdw_routine->RollbackForeignTransaction) + ereport(ERROR, + (errmsg("no function to rollback a foreign transaction provided for FDW %s", + fdw->fdwname))); + + /* Generate foreign transaction identifier if not provided */ + if (fx_id == NULL) + fx_id = generate_fdw_xact_identifier(serverid, userid); + + fdw_part = (FdwXactParticipant *) palloc(sizeof(FdwXactParticipant)); + + fdw_part->serverid = serverid; + fdw_part->userid = userid; + fdw_part->fdw_xact_id = fx_id; + fdw_part->fdw_xact = NULL; + fdw_part->modified = false; /* by default */ + fdw_part->foreign_xact.server = foreign_server; + fdw_part->foreign_xact.usermapping = user_mapping; + fdw_part->foreign_xact.fx_id = fx_id; + fdw_part->prepare_foreign_xact = fdw_routine->PrepareForeignTransaction; + fdw_part->commit_foreign_xact = fdw_routine->CommitForeignTransaction; + fdw_part->rollback_foreign_xact = fdw_routine->RollbackForeignTransaction; + + /* Add this foreign connection to the participants list */ + FdwXactParticipantsForAC = lappend(FdwXactParticipantsForAC, fdw_part); + + /* Revert back the context */ + MemoryContextSwitchTo(old_context); + + return; +} + +/* + * Remember the registered foreign transaction modified data . This function + * is called when the executor begins to modify data on a foreign server + * regardless the foreign server is capable of two-phase commit protocol. + * Marking it will be used to determine we must use two-phase commit protocol + * at commit. This function also checks if the begin modified foreign server + * is capable of two-phase commit or not. If it doesn't support, we remember + * it. + */ +void +FdwXactMarkForeignTransactionModified(ResultRelInfo *resultRelInfo, int flags) +{ + Relation rel = resultRelInfo->ri_RelationDesc; + FdwXactParticipant *fdw_part; + ForeignTable *ftable; + ListCell *lc; + Oid userid; + Oid serverid; + + bool found = false; + + /* Quick return if user not request */ + if (!AtomicCommitRequested()) + return; + + /* Do nothing in EXPLAIN (no ANALYZE) case */ + if (flags && EXEC_FLAG_EXPLAIN_ONLY) + return; + + ftable = GetForeignTable(RelationGetRelid(rel)); + + /* + * If the being modified foreign server doesn't or cannot enable + * two-phase commit protocol, mark that we've written such server + * and return. + */ + if (resultRelInfo->ri_FdwRoutine->IsTwoPhaseCommitEnabled == NULL || + !resultRelInfo->ri_FdwRoutine->IsTwoPhaseCommitEnabled(ftable->serverid)) + { + MyXactFlags |= XACT_FLAGS_FDWNOPREPARE; + return; + } + + /* + * The foreign server being modified supports two-phase commit protocol, + * remember that the foreign transaction modified data. + */ + userid = rel->rd_rel->relowner ? rel->rd_rel->relowner : GetUserId(); + serverid = ftable->serverid; + foreach(lc, FdwXactParticipantsForAC) + { + fdw_part = lfirst(lc); + + if (fdw_part->serverid == serverid && fdw_part->userid == userid) + { + fdw_part->modified = true; + found = true; + break; + } + } + + if (!found) + elog(ERROR, "attempt to mark unregistered foreign server %u, user %u as modified", + serverid, userid); +} + +/* + * FdwXactShmemSize + * Calculates the size of shared memory allocated for maintaining foreign + * prepared transaction entries. + */ +Size +FdwXactShmemSize(void) +{ + Size size; + + /* Size for foreign transaction information array */ + size = offsetof(FdwXactCtlData, fdw_xacts); + size = add_size(size, mul_size(max_prepared_foreign_xacts, + sizeof(FdwXact))); + size = MAXALIGN(size); + size = add_size(size, mul_size(max_prepared_foreign_xacts, + sizeof(FdwXactData))); + + /* Size for shared cache entry */ + size = MAXALIGN(size); + size = add_size(size, hash_estimate_size(max_prepared_foreign_xacts, + sizeof(FdwXactStateCacheEntry))); + + return size; +} + +/* + * FdwXactShmemInit + * Initialization of shared memory for maintaining foreign prepared transaction + * entries. The shared memory layout is defined in definition of + * FdwXactCtlData structure. + */ +void +FdwXactShmemInit(void) +{ + bool found; + + if (!fdwXactExitRegistered) + { + before_shmem_exit(AtProcExit_FdwXact, 0); + fdwXactExitRegistered = true; + } + + FdwXactCtl = ShmemInitStruct("Foreign transactions table", + FdwXactShmemSize(), + &found); + if (!IsUnderPostmaster) + { + FdwXact fdw_xacts; + HASHCTL info; + long max_hash_size; + int cnt; + + Assert(!found); + FdwXactCtl->freeFdwXacts = NULL; + FdwXactCtl->numFdwXacts = 0; + + /* Initialize the linked list of free FDW transactions */ + fdw_xacts = (FdwXact) + ((char *) FdwXactCtl + + MAXALIGN(offsetof(FdwXactCtlData, fdw_xacts) + + sizeof(FdwXact) * max_prepared_foreign_xacts)); + for (cnt = 0; cnt < max_prepared_foreign_xacts; cnt++) + { + fdw_xacts[cnt].status = FDW_XACT_INITIAL; + fdw_xacts[cnt].fxact_free_next = FdwXactCtl->freeFdwXacts; + FdwXactCtl->freeFdwXacts = &fdw_xacts[cnt]; + } + + /* Initialize shared state cache hash table */ + MemSet(&info, 0, sizeof(info)); + info.keysize = sizeof(TransactionId); + info.entrysize = sizeof(FdwXactStateCacheEntry); + max_hash_size = max_prepared_foreign_xacts; + + FdwXactStateCache = ShmemInitHash("FdwXact hash", + max_hash_size, + max_hash_size, + &info, + HASH_ELEM | HASH_BLOBS); + } + else + { + Assert(FdwXactCtl); + Assert(found); + } +} + +/* + * PreCommit_FdwXacts + * + * This function prepares all foreign transaction participants if atomic commit + * is required. Otherwise commits them without preparing. + * + * If atomic commit is requested by user (that is, foreign_twophase_commit is on), + * every participants must enable two-phase commit. If we manage all foreign + * transactions involving with a transaction we can commit foreign transactions + * on foreign server that doesn't use two-phase commit here and commit others + * at post-commit phase, but we don't do that. Because (1) it doesn't satisfy + * the atomic commit semantics at all and (2) it requires all FDWs to register + * foreign server anyway, which breaks backward compatibility. + */ +void +PreCommit_FdwXacts(void) +{ + /* If there are no foreign servers involved, we have no business here */ + if (FdwXactParticipantsForAC == NIL) + return; + + /* + * If user requires the atomic commit semantics, we don't allow COMMIT if we've + * modified data on foreign servers both that can execute two-phase commit + * protocol and that cannot. + */ + if (foreign_twophase_commit == true && MyXactFlags & XACT_FLAGS_FDWNOPREPARE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot COMMIT a distributed transaction that has operated on foreign server that doesn't support atomic commit"))); + + if (ForeignTwophaseCommitRequired()) + { + /* Prepare the transactions on the all foreign servers */ + FdwXactPrepareForeignTransactions(); + } + else + { + ListCell *lc; + + Assert(list_length(FdwXactParticipantsForAC) == 1); + + /* Two-phase commit is not required, commit them one by one */ + foreach(lc, FdwXactParticipantsForAC) + { + FdwXactParticipant *fdw_part = (FdwXactParticipant *) lfirst(lc); + + /* Commit foreign transaction */ + if (!fdw_part->commit_foreign_xact(&fdw_part->foreign_xact)) + ereport(ERROR, + (errmsg("could not commit foreign transaction on server %s", + fdw_part->foreign_xact.server->servername))); + } + + /* Forget all participants */ + FdwXactParticipantsForAC = NIL; + } +} + +/* + * FdwXactPrepareForeignTransactions + * + * Prepare all foreign transaction participants. This function creates a prepared + * participants chain whenever we prepared a foreign transaction. The prepared + * participants chain is used to access all participants of distributed transaction + * quickly. If any one of them fails to prepare or raises an error, we change over + * to aborts. + */ +static void +FdwXactPrepareForeignTransactions(void) +{ + ListCell *lcell; + FdwXact prev_fxact = NULL; + + /* Loop over the foreign connections */ + foreach(lcell, FdwXactParticipantsForAC) + { + FdwXactParticipant *fdw_part = (FdwXactParticipant *) lfirst(lcell); + FdwXact fxact; + + /* + * Register the foreign transaction entry. Registration persists this + * information to the disk and logs (that way relaying it on standby). + * Thus in case we loose connectivity to the foreign server or crash + * ourselves, we will remember that we might have prepared transaction + * on the foreign server and try to resolve it when connectivity is + * restored or after crash recovery. + * + * If we prepare the transaction on the foreign server before persisting + * the information to the disk and crash in-between these two steps, + * we will forget that we prepared the transaction on the foreign server + * and will not be able to resolve it after the crash. Hence persist + * first then prepare. + */ + fxact = FdwXactRegisterFdwXactEntry(GetTopTransactionId(), fdw_part); + + /* + * Between FdwXactRegisterFdwXactEntry call till this backend hears + * acknowledge from foreign server, the backend may abort the local + * transaction (say, because of a signal). During abort processing, + * we might try to resolve a never-prepared transaction, and get an error. + * This is fine as long as the FDW provides us unique prepared transaction + * identifiers. + */ + if (!fdw_part->prepare_foreign_xact(&fdw_part->foreign_xact)) + { + /* Failed to prepare, change over aborts */ + ereport(ERROR, + (errmsg("could not prepare transaction on foreign server %s", + fdw_part->foreign_xact.server->servername))); + } + + /* Preparation is success, update its status */ + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + fdw_part->fdw_xact->status = FDW_XACT_PREPARED; + fdw_part->fdw_xact = fxact; + LWLockRelease(FdwXactLock); + + /* + * Create a prepared participants chain, which is link-ed FdwXact entries + * involving with this transaction. The head entry is remembered in hash + * table and subsequent entries is liked from the previous entry. + */ + if (!prev_fxact) + { + FdwXactStateCacheEntry *fxact_entry; + bool found; + + LWLockAcquire(FdwXactLock,LW_EXCLUSIVE); + fxact_entry = (FdwXactStateCacheEntry *) hash_search(FdwXactStateCache, + (void *) &(fxact->local_xid), + HASH_ENTER, &found); + LWLockRelease(FdwXactLock); + Assert(!found); + + /* Set the first participant */ + fxact_entry->participants = fxact; + } + else + { + /* Append others to the tail */ + Assert(fxact->fxact_next == NULL); + prev_fxact->fxact_next = fxact; + } + + prev_fxact = fxact; + } +} + +/* + * FdwXactRegisterFdwXactEntry + * + * This function is used to create new foreign transaction entry before an FDW + * prepares and commit/rollback. The function adds the entry to WAL and will + * be persisted to the disk under pg_fdw_xact directory when checkpoint. + */ +static FdwXact +FdwXactRegisterFdwXactEntry(TransactionId xid, FdwXactParticipant *fdw_part) +{ + FdwXact fxact; + FdwXactOnDiskData *fxact_file_data; + MemoryContext old_context; + int data_len; + + old_context = MemoryContextSwitchTo(TopTransactionContext); + + /* + * Enter the foreign transaction in the shared memory structure. + */ + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + fxact = insert_fdw_xact(MyDatabaseId, xid, fdw_part->serverid, + fdw_part->userid, fdw_part->fdw_xact_id); + fxact->status = FDW_XACT_PREPARING; + fxact->registered_backend = MyBackendId; + fdw_part->fdw_xact = fxact; + LWLockRelease(FdwXactLock); + + MemoryContextSwitchTo(old_context); + + /* + * Prepare to write the entry to a file. Also add xlog entry. The contents + * of the xlog record are same as what is written to the file. + */ + data_len = offsetof(FdwXactOnDiskData, fdw_xact_id); + data_len = data_len + strlen(fdw_part->fdw_xact_id) + 1; + data_len = MAXALIGN(data_len); + fxact_file_data = (FdwXactOnDiskData *) palloc0(data_len); + fxact_file_data->dbid = MyDatabaseId; + fxact_file_data->local_xid = xid; + fxact_file_data->serverid = fdw_part->serverid; + fxact_file_data->userid = fdw_part->userid; + memcpy(fxact_file_data->fdw_xact_id, fdw_part->fdw_xact_id, + strlen(fdw_part->fdw_xact_id) + 1); + + /* See note in RecordTransactionCommit */ + MyPgXact->delayChkpt = true; + + START_CRIT_SECTION(); + + /* Add the entry in the xlog and save LSN for checkpointer */ + XLogBeginInsert(); + XLogRegisterData((char *) fxact_file_data, data_len); + fxact->insert_end_lsn = XLogInsert(RM_FDW_XACT_ID, XLOG_FDW_XACT_INSERT); + XLogFlush(fxact->insert_end_lsn); + + /* If we crash now, we have prepared: WAL replay will fix things */ + + /* Store record's start location to read that later on CheckPoint */ + fxact->insert_start_lsn = ProcLastRecPtr; + + /* File is written completely, checkpoint can proceed with syncing */ + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + fxact->valid = true; + LWLockRelease(FdwXactLock); + + /* Checkpoint can process now */ + MyPgXact->delayChkpt = false; + + END_CRIT_SECTION(); + + pfree(fxact_file_data); + return fxact; +} + +/* + * insert_fdw_xact + * + * Insert a new entry for a given foreign transaction identified by transaction + * id, foreign server and user mapping, into the shared memory array. Caller + * must hold FdwXactLock in exclusive mode. + * + * If the entry already exists, the function raises an error. + */ +static FdwXact +insert_fdw_xact(Oid dbid, TransactionId xid, Oid serverid, Oid userid, + char *fdw_xact_id) +{ + int i; + FdwXact fxact; + + Assert(LWLockHeldByMeInMode(FdwXactLock, LW_EXCLUSIVE)); + + /* Check for duplicated foreign transaction entry */ + for (i = 0; i < FdwXactCtl->numFdwXacts; i++) + { + fxact = FdwXactCtl->fdw_xacts[i]; + if (fxact->dbid == dbid && + fxact->local_xid == xid && + fxact->serverid == serverid && + fxact->userid == userid) + ereport(ERROR, (errmsg("could not insert a foreign transaction entry"), + errdetail("duplicate entry with transaction id %u, serverid %u, userid %u", + xid, serverid, userid))); + } + + /* + * Get a next free foreign transaction entry. Raise error if there are + * none left. + */ + if (!FdwXactCtl->freeFdwXacts) + { + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("maximum number of foreign transactions reached"), + errhint("Increase max_prepared_foreign_transactions: \"%d\".", + max_prepared_foreign_xacts))); + } + fxact = FdwXactCtl->freeFdwXacts; + FdwXactCtl->freeFdwXacts = fxact->fxact_free_next; + + /* Insert the entry to shared memory array */ + Assert(FdwXactCtl->numFdwXacts < max_prepared_foreign_xacts); + FdwXactCtl->fdw_xacts[FdwXactCtl->numFdwXacts++] = fxact; + + fxact->registered_backend = InvalidBackendId; + fxact->dbid = dbid; + fxact->local_xid = xid; + fxact->serverid = serverid; + fxact->userid = userid; + fxact->insert_start_lsn = InvalidXLogRecPtr; + fxact->insert_end_lsn = InvalidXLogRecPtr; + fxact->valid = false; + fxact->ondisk = false; + fxact->inredo = false; + memcpy(fxact->fdw_xact_id, fdw_xact_id, strlen(fdw_xact_id) + 1); + + return fxact; +} + +/* + * remove_fdw_xact + * + * Remove the foreign prepared transaction entry from shared memory. + * Caller must hold FdwXactLock in exclusive mode. + */ +static void +remove_fdw_xact(FdwXact fdw_xact) +{ + int cnt; + + Assert(fdw_xact != NULL); + Assert(LWLockHeldByMeInMode(FdwXactLock, LW_EXCLUSIVE)); + + /* Search the slot where this entry resided */ + for (cnt = 0; cnt < FdwXactCtl->numFdwXacts; cnt++) + { + if (FdwXactCtl->fdw_xacts[cnt] == fdw_xact) + break; + } + + /* We did not find the given entry in the array */ + if (cnt >= FdwXactCtl->numFdwXacts) + ereport(ERROR, + (errmsg("could not remove a foreign transaction entry"), + errdetail("failed to find entry for xid %u, foreign server %u, and user %u", + fdw_xact->local_xid, fdw_xact->serverid, fdw_xact->userid))); + + /* Remove the entry from active array */ + FdwXactCtl->numFdwXacts--; + FdwXactCtl->fdw_xacts[cnt] = FdwXactCtl->fdw_xacts[FdwXactCtl->numFdwXacts]; + + /* Put it back into free list */ + fdw_xact->fxact_free_next = FdwXactCtl->freeFdwXacts; + FdwXactCtl->freeFdwXacts = fdw_xact; + + /* Reset informations */ + fdw_xact->status = FDW_XACT_INITIAL; + fdw_xact->registered_backend = InvalidBackendId; + fdw_xact->fxact_next = NULL; + + if (!RecoveryInProgress()) + { + xl_fdw_xact_remove record; + XLogRecPtr recptr; + + /* Fill up the log record before releasing the entry */ + record.serverid = fdw_xact->serverid; + record.dbid = fdw_xact->dbid; + record.xid = fdw_xact->local_xid; + record.userid = fdw_xact->userid; + + /* + * Now writing FdwXact state data to WAL. We have to set delayChkpt + * here, otherwise a checkpoint starting immediately after the + * WAL record is inserted could complete without fsync'ing our + * state file. (This is essentially the same kind of race condition + * as the COMMIT-to-clog-write case that RecordTransactionCommit + * uses delayChkpt for; see notes there.) + */ + START_CRIT_SECTION(); + + MyPgXact->delayChkpt = true; + + /* + * Log that we are removing the foreign transaction entry and + * remove the file from the disk as well. + */ + XLogBeginInsert(); + XLogRegisterData((char *) &record, sizeof(xl_fdw_xact_remove)); + recptr = XLogInsert(RM_FDW_XACT_ID, XLOG_FDW_XACT_REMOVE); + XLogFlush(recptr); + + /* + * Now we can mark ourselves as out of the commit critical section: a + * checkpoint starting after this will certainly see the gxact as a + * candidate for fsyncing. + */ + MyPgXact->delayChkpt = false; + + END_CRIT_SECTION(); + } +} + +/* + * Return true if the current transaction requires foreign two-phase commit + * to achieve atomic commit. Foreign two-phase commit is required if we + * satisfy either case: we modified data on two or more foreign server, or + * we modified both non-temporary relation on local and data on more than + * one foreign server. + */ +bool +ForeignTwophaseCommitRequired(void) +{ + int nserverswritten = list_length(FdwXactParticipantsForAC); + ListCell* lc; + bool modified = false; + + /* Return if not requested */ + if (!AtomicCommitRequested()) + return false; + + /* Check if we modified data on any foreign server */ + foreach(lc, FdwXactParticipantsForAC) + { + FdwXactParticipant *fdw_part = (FdwXactParticipant *) lfirst(lc); + + if (fdw_part->modified) + { + modified = true; + break; + } + } + + /* We didn't modify data on any foreign server */ + if (!modified) + return false; + + if ((MyXactFlags & XACT_FLAGS_WROTENONTEMPREL) != 0) + ++nserverswritten; + + return nserverswritten > 1; +} + +/* + * Compute the oldest xmin across all unresolved foreign transactions + * and store it in the ProcArray. + */ +static void +FdwXactComputeRequiredXmin(void) +{ + int i; + TransactionId agg_xmin = InvalidTransactionId; + + Assert(FdwXactCtl != NULL); + + LWLockAcquire(FdwXactLock, LW_SHARED); + + for (i = 0; i < FdwXactCtl->numFdwXacts; i++) + { + FdwXact fdwxact = FdwXactCtl->fdw_xacts[i]; + + if (!fdwxact->valid) + continue; + + Assert(TransactionIdIsValid(fdwxact->local_xid)); + + if (!TransactionIdIsValid(agg_xmin) || + TransactionIdPrecedes(fdwxact->local_xid, agg_xmin)) + agg_xmin = fdwxact->local_xid; + } + + LWLockRelease(FdwXactLock); + + ProcArraySetFdwXactUnresolvedXmin(agg_xmin); +} + +/* + * ForgetAllFdwXactParticipants + * + * Reset all the foreign transaction entries that this backend registered. + * If the foreign transaction has the corresponding FdwXact entry, resetting + * the registered_backend field means to leave that entry in unresolved state. + * If we leaves any entries, we update the oldest xmin of unresolved transaction + * so that transaction status of dangling transaction are not truncated. + */ +static void +ForgetAllFdwXactParticipants(void) +{ + ListCell *cell; + int n_left = 0; + + if (FdwXactParticipantsForAC == NIL) + return; + + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + + foreach(cell, FdwXactParticipantsForAC) + { + FdwXactParticipant *fdw_part = (FdwXactParticipant *) lfirst(cell); + + /* Skip if didn't register FdwXact entry yet */ + if (fdw_part->fdw_xact == NULL) + continue; + + /* + * There is a race condition; the entries of FdwXactParticipantsForAC + * could be used by other backend before we forget in case where the + * resolver process removes the FdwXact entry and other backend reuses + * it before we forget them. So we need to check if the entries are + * still associated with the transaction. + */ + if (fdw_part->fdw_xact->registered_backend == MyBackendId) + { + fdw_part->fdw_xact->registered_backend = InvalidBackendId; + n_left++; + } + } + + LWLockRelease(FdwXactLock); + + /* + * Update the oldest local transaction of unresolved distributed + * transaction if we leaved any FdwXact entries. + */ + if (n_left > 0) + FdwXactComputeRequiredXmin(); + + FdwXactParticipantsForAC = NIL; +} + +/* + * AtProcExit_FdwXact + * + * When the process exits, forget all the entries. + */ +static void +AtProcExit_FdwXact(int code, Datum arg) +{ + ForgetAllFdwXactParticipants(); +} + +/* + * Wait for foreign transaction to be resolved. + * + * Initially backends start in state FDW_XACT_NOT_WAITING and then change + * that state to FDW_XACT_WAITING before adding ourselves to the wait queue. + * During FdwXactResolveForeignTransactions a fdwxact resolver changes the + * state to FDW_XACT_WAIT_COMPLETE once foreign transactions are resolved. + * This backend then resets its state to FDW_XACT_NOT_WAITING. + * + * This function is inspired by SyncRepWaitForLSN. + */ +void +FdwXactWaitToBeResolved(TransactionId wait_xid, bool is_commit) +{ + char *new_status = NULL; + const char *old_status; + ListCell *lc; + List *fdwxact_participants = NIL; + + /* Quick exit if atomic commit is not requested */ + if (!AtomicCommitRequested()) + return; + + Assert(FdwXactCtl != NULL); + Assert(TransactionIdIsValid(wait_xid)); + Assert(SHMQueueIsDetached(&(MyProc->fdwXactLinks))); + Assert(MyProc->fdwXactState == FDW_XACT_NOT_WAITING); + + if (FdwXactParticipantsForAC != NIL) + { + /* + * If we're waiting for foreign transactions to be resolved that + * we've prepared just before, use the participants list. + */ + Assert(MyPgXact->xid == wait_xid); + fdwxact_participants = FdwXactParticipantsForAC; + } + else + { + FdwXactStateCacheEntry *fdwxact_entry; + bool found; + + /* + * If we're waiting for foreign transactions to be resolved that + * is part of a local prepared transaction that is mark as + * prepared during running, since these entries exist in the hash + * table we construct the participants list from the entry. + */ + Assert(FdwXactStateCache); + fdwxact_entry = (FdwXactStateCacheEntry *) hash_search(FdwXactStateCache, + (void *) &wait_xid, + HASH_FIND, &found); + + if (found) + { + FdwXact fdwxact; + + for (fdwxact = fdwxact_entry->participants; + fdwxact != NULL; + fdwxact = fdwxact->fxact_next) + fdwxact_participants = lappend(fdwxact_participants, fdwxact); + } + } + + /* + * Otherwise, construct the participants list by scanning the global + * array. This can happen in the case where we restarts after PREPARE'd + * a distributed transaction and then are trying to resolve it. + */ + if (fdwxact_participants == NIL) + fdwxact_participants = get_fdw_xacts(MyDatabaseId, wait_xid, + InvalidOid, InvalidOid, true); + + /* Exit if we found no foreign transaction to resolve */ + if (fdwxact_participants == NIL) + return; + + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + + foreach(lc, fdwxact_participants) + { + FdwXact fdw_xact = (FdwXact) lfirst(lc); + + /* Don't overwrite status if fate has been determined */ + if (fdw_xact->status == FDW_XACT_PREPARED) + fdw_xact->status = (is_commit ? + FDW_XACT_COMMITTING_PREPARED : + FDW_XACT_ABORTING_PREPARED); + } + + /* Set backend status and enqueue itself */ + MyProc->fdwXactState = FDW_XACT_WAITING; + MyProc->fdwXactWaitXid = wait_xid; + FdwXactQueueInsert(); + LWLockRelease(FdwXactLock); + + /* Launch a resolver process if not yet, or wake it up */ + fdwxact_maybe_launch_resolver(false); + + /* + * Alter ps display to show waiting for foreign transaction + * resolution. + */ + if (update_process_title) + { + int len; + + old_status = get_ps_display(&len); + new_status = (char *) palloc(len + 31 + 1); + memcpy(new_status, old_status, len); + sprintf(new_status + len, " waiting for resolution %d", wait_xid); + set_ps_display(new_status, false); + new_status[len] = '\0'; /* truncate off "waiting ..." */ + } + + /* Wait for all foreign transactions to be resolved */ + for (;;) + { + /* Must reset the latch before testing state */ + ResetLatch(MyLatch); + + /* + * Acquiring the lock is not needed, the latch ensures proper + * barriers. If it looks like we're done, we must really be done, + * because once walsender changes the state to FDW_XACT_WAIT_COMPLETE, + * it will never update it again, so we can't be seeing a stale value + * in that case. + */ + if (MyProc->fdwXactState == FDW_XACT_WAIT_COMPLETE) + break; + + /* + * If a wait for foreign transaction resolution is pending, we can + * neither acknowledge the commit nor raise ERROR or FATAL. The latter + * would lead the client to believe that the distributed transaction + * aborted, which is not true: it's already committed locally. The + * former is no good either: the client has requested committing a + * distributed transaction, and is entitled to assume that a acknowledged + * commit is also commit on all foreign servers, which might not be + * true. So in this case we issue a WARNING (which some clients may + * be able to interpret) and shut off further output. We do NOT reset + * PorcDiePending, so that the process will die after the commit is + * cleaned up. + */ + if (ProcDiePending) + { + ereport(WARNING, + (errcode(ERRCODE_ADMIN_SHUTDOWN), + errmsg("canceling the wait for resolving foreign transaction and terminating connection due to administrator command"), + errdetail("The transaction has already committed locally, but might not have been committed on the foreign server."))); + whereToSendOutput = DestNone; + FdwXactCancelWait(); + break; + } + + /* + * If a query cancel interrupt arrives we just terminate the wait with + * a suitable warning. The foreign transactions can be orphaned but + * the foreign xact resolver can pick up them and tries to resolve them + * later. + */ + if (QueryCancelPending) + { + QueryCancelPending = false; + ereport(WARNING, + (errmsg("canceling wait for resolving foreign transaction due to user request"), + errdetail("The transaction has already committed locally, but might not have been committed on the foreign server."))); + FdwXactCancelWait(); + break; + } + + /* + * If the postmaster dies, we'll probably never get an + * acknowledgement, because all the wal sender processes will exit. So + * just bail out. + */ + if (!PostmasterIsAlive()) + { + ProcDiePending = true; + whereToSendOutput = DestNone; + FdwXactCancelWait(); + break; + } + + /* + * Wait on latch. Any condition that should wake us up will set the + * latch, so no need for timeout. + */ + WaitLatch(MyLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, -1, + WAIT_EVENT_FDW_XACT_RESOLUTION); + } + + pg_read_barrier(); + + Assert(SHMQueueIsDetached(&(MyProc->fdwXactLinks))); + MyProc->fdwXactState = FDW_XACT_NOT_WAITING; + + /* + * Forget the list of locked entries, also means that the entries + * that could not resolved are remained as dangling transactions. + */ + ForgetAllFdwXactParticipants(); + + if (new_status) + { + set_ps_display(new_status, false); + pfree(new_status); + } +} + +/* + * Acquire FdwXactLock and cancel any wait currently in progress. + */ +static void +FdwXactCancelWait(void) +{ + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + if (!SHMQueueIsDetached(&(MyProc->fdwXactLinks))) + SHMQueueDelete(&(MyProc->fdwXactLinks)); + MyProc->fdwXactState = FDW_XACT_NOT_WAITING; + LWLockRelease(FdwXactLock); +} + +/* + * Insert MyProc into the tail of FdwXactQueue. + */ +static void +FdwXactQueueInsert(void) +{ + SHMQueueInsertBefore(&(FdwXactRslvCtl->FdwXactQueue), + &(MyProc->fdwXactLinks)); +} + +void +FdwXactCleanupAtProcExit(void) +{ + if (!SHMQueueIsDetached(&(MyProc->fdwXactLinks))) + { + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + SHMQueueDelete(&(MyProc->fdwXactLinks)); + LWLockRelease(FdwXactLock); + } +} + +/* + * Create and initialize an FdwXactResolveState which is used + * for resolution of foreign transactions. + */ +FdwXactResolveState * +CreateFdwXactResolveState(void) +{ + FdwXactResolveState *frstate = palloc0(sizeof(FdwXactResolveState)); + + frstate->dbid = MyDatabaseId; + frstate->fdwxact = NULL; + frstate->waiter = NULL; + + return frstate; +} + +/* + * Resolve one distributed transaction. The target distributed transaction + * is fetched from shmem queue and its participants are fetched from either + * shmem hash table or global array. Release the waiter and return true only + * if we resolved the all of the foreign transaction participants. Return + * false if we flied to resolve any of them. + * + * To ensure the order of registered distributed transaction to the queue, we + * must not go the next distributed transaction until all of participants are + * resolved. The failed foreign transactions will be retried at the next execution. + */ +bool +FdwXactResolveDistributedTransaction(FdwXactResolveState *frstate) +{ + FdwXactStateCacheEntry *fdwxact_entry = NULL; + volatile FdwXact fdwxacts_failed_to_resolve = NULL; + bool all_resolved = false; + + Assert(frstate->dbid == MyDatabaseId); + + /* Get a new waiter, if not exists */ + if (frstate->waiter == NULL) + { + PGPROC *proc; + + LWLockAcquire(FdwXactLock, LW_SHARED); + + /* Fetch a waiter from beginning of the queue */ + while ((proc = (PGPROC *) SHMQueueNext(&(FdwXactRslvCtl->FdwXactQueue), + &(FdwXactRslvCtl->FdwXactQueue), + offsetof(PGPROC, fdwXactLinks))) != NULL) + { + /* Found a waiter */ + if (proc->databaseId == frstate->dbid) + break; + } + + LWLockRelease(FdwXactLock); + + /* If no waiter, there is no job */ + if (!proc) + return false; + + Assert(TransactionIdIsValid(proc->fdwXactWaitXid)); + frstate->waiter = proc; + } + + /* Get foreign transaction participants */ + if (frstate->fdwxact == NULL) + { + bool found; + + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + + /* Search FdwXact entries from the hash table by the local transaction id */ + fdwxact_entry = + (FdwXactStateCacheEntry *) hash_search(FdwXactStateCache, + (void *) &(frstate->waiter->fdwXactWaitXid), + HASH_FIND, &found); + + if (found) + frstate->fdwxact = fdwxact_entry->participants; + else + { + int i; + FdwXact entries_to_resolve = NULL; + FdwXact prev_fx = NULL; + + /* + * The fdwxact entry doesn't exist in the hash table in case where + * a prepared transaction is resolved after recovery. In this case, + * we construct a list of fdw xact entries by scanning over the + * FdwXactCtl->fdw_xacts list. + */ + for (i = 0; i < FdwXactCtl->numFdwXacts; i++) + { + FdwXact fdw_xact = FdwXactCtl->fdw_xacts[i]; + + if (fdw_xact->dbid == frstate->dbid && + fdw_xact->local_xid == frstate->waiter->fdwXactWaitXid) + { + if (!entries_to_resolve) + entries_to_resolve = fdw_xact; + + /* Link from previous entry to this entry */ + if (prev_fx) + prev_fx->fxact_next = fdw_xact; + + prev_fx = fdw_xact; + } + } + + frstate->fdwxact = entries_to_resolve; + } + + LWLockRelease(FdwXactLock); + } + + Assert(frstate->fdwxact != NULL); + + /* Resolve all foreign transactions one by one */ + while (frstate->fdwxact != NULL) + { + volatile FdwXact cur_fdwxact = frstate->fdwxact; + volatile FdwXact fdwxact_next = NULL; + + /* + * Remember the next FdwXact entry to resolve as the current entry will + * be removed after resolved from the list. + */ + fdwxact_next = cur_fdwxact->fxact_next; + + /* Resolve a foreign transaction */ + if (!FdwXactResolveForeignTransaction(cur_fdwxact)) + { + ForeignServer *fserver; + + CHECK_FOR_INTERRUPTS(); + + /* Failed to resolve. Remember it for the next execution */ + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + if (fdwxacts_failed_to_resolve == NULL) + { + /* + * For the first failed entry, reset its next pointer + * and append it to the head of list. + */ + cur_fdwxact->fxact_next = NULL; + fdwxacts_failed_to_resolve = cur_fdwxact; + } + else + { + FdwXact fx = fdwxacts_failed_to_resolve; + + /* Append the entry at the tail */ + while (fx->fxact_next != NULL) + fx = fx->fxact_next; + fx->fxact_next = cur_fdwxact; + } + LWLockRelease(FdwXactLock); + + fserver = GetForeignServer(cur_fdwxact->serverid); + ereport(LOG, + (errmsg("could not resolve a foreign transaction on server \"%s\"", + fserver->servername), + errdetail("local transaction id is %u, connected by user id %u", + cur_fdwxact->local_xid, cur_fdwxact->userid))); + } + else + { + /* Resolved. Update the cache entry if it's valid */ + if (fdwxact_entry) + fdwxact_entry->participants = fdwxact_next; + + elog(DEBUG2, "resolved a foreign transaction xid %u, serverid %d, userid %d", + cur_fdwxact->local_xid, cur_fdwxact->serverid, cur_fdwxact->userid); + } + + /* Advance the resolution status to the next */ + frstate->fdwxact = fdwxact_next; + } + + all_resolved = (fdwxacts_failed_to_resolve == NULL); + + if (all_resolved) + { + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + + /* Remove the state cache entry from shmem hash table */ + hash_search(FdwXactStateCache, (void *) &(frstate->waiter->fdwXactWaitXid), + HASH_REMOVE, NULL); + + /* + * Remove waiter from shmem queue, if not detached yet. The waiter + * could already be detached if user cancelled to wait before + * resolution. + */ + if (!SHMQueueIsDetached(&(frstate->waiter->fdwXactLinks))) + { + TransactionId wait_xid = frstate->waiter->fdwXactWaitXid; + + SHMQueueDelete(&(frstate->waiter->fdwXactLinks)); + + pg_write_barrier(); + + /* Set state to complete */ + frstate->waiter->fdwXactState = FDW_XACT_WAIT_COMPLETE; + + /* Wake up the waiter only when we have set state and removed from queue */ + SetLatch(&(frstate->waiter->procLatch)); + + elog(DEBUG2, "released a proc xid %u", wait_xid); + } + + LWLockRelease(FdwXactLock); + + /* Reset resolution state */ + frstate->waiter = NULL; + Assert(frstate->fdwxact == NULL); + } + else + { + /* + * Update the fdwxact entry we're processing so that the failed + * fdwxact entries will be processed again. + */ + frstate->fdwxact = fdwxacts_failed_to_resolve; + } + + return all_resolved; +} + +/* + * Resolve all dangling foreign transactions on the given database. Get + * all dangling foreign transactions from shmem global array and resolve + * them one by one. + * + * Unlike FdwXactResolveDistributedTransaction, for dangling transaction + * resolution, we don't bother the order of resolution because these entries + * already got out of order. So if failed to resolve a foreign transaction, + * we can go to the next foreign transaction that might associates with + * an another distributed transaction. + */ +void +FdwXactResolveAllDanglingTransactions(Oid dbid) +{ + List *dangling_fdwxacts = NIL; + ListCell *cell; + bool n_resolved = 0; + int i; + + Assert(OidIsValid(dbid)); + + LWLockAcquire(FdwXactLock, LW_SHARED); + + /* + * Walk over the global array to make the list of dangling transactions + * of which corresponding local transaction is on the given database. + */ + for (i = 0; i < FdwXactCtl->numFdwXacts; i++) + { + FdwXact fxact = FdwXactCtl->fdw_xacts[i]; + + /* + * Append the fdwxact entry on the given database to the list if + * it's handled by nobody and the corresponding local transaction + * is not part of the prepared transaction. + */ + if (fxact->dbid == dbid && + fxact->registered_backend == InvalidBackendId && + !TwoPhaseExists(fxact->local_xid)) + dangling_fdwxacts = lappend(dangling_fdwxacts, fxact); + } + + LWLockRelease(FdwXactLock); + + /* Return if there is no foreign transaction we need to resolve */ + if (dangling_fdwxacts == NIL) + return; + + foreach(cell, dangling_fdwxacts) + { + FdwXact fdwxact = (FdwXact) lfirst(cell); + + if (!FdwXactResolveForeignTransaction(fdwxact)) + { + ForeignServer *fserver = GetForeignServer(fdwxact->serverid); + + /* + * If failed to resolve this foreign transaction we skip it in + * this resolution cycle. Try to resolve again in next cycle. + */ + ereport(LOG, + (errmsg("could not resolve a dangling foreign transaction on server \"%s\"", + fserver->servername), + errdetail("local transaction id is %u, connected by user id %u", + fdwxact->local_xid, fdwxact->userid))); + continue; + } + + n_resolved++; + } + + list_free(dangling_fdwxacts); + + elog(DEBUG2, "resolved %d dangling foreign xacts", n_resolved); +} + +/* + * AtEOXact_FdwXacts + * + * In commit case, we have already prepared transactions on the foreign + * servers during pre-commit. And that prepared transactions will be + * resolved by the resolver process. So we don't do anything about the + * foreign transaction. + * + * In abort case, user requested rollback or we changed over rollback + * due to error during commit. To close current foreign transaction anyway + * we call rollback API to every foreign transaction. If we raised an error + * during preparing and came to here, it's possible that some entries of + * FdwXactParticipants already registered its FdwXact entry. If there is + * we leave them as dangling transaction and ask the resolver process to + * process them. + */ +extern void +AtEOXact_FdwXacts(bool is_commit) +{ + ListCell *lcell; + + if (!is_commit) + { + int left_fdwxacts = 0; + + foreach (lcell, FdwXactParticipantsForAC) + { + FdwXactParticipant *fdw_part = lfirst(lcell); + + /* + * Count FdwXact entries that we registered to shared memory array + * in this transaction. + */ + if (fdw_part->fdw_xact) + { + /* + * The status of foreign transaction must be either preparing + * or prepared. In any case, since we have registered FdwXact + * entry we leave them to the resolver process. For the preparing + * state, since the foreign transaction might not close yet we + * fall through and call rollback API. For the prepared state, + * since the foreign transaction has closed we don't need to do + * anything. + */ + Assert(fdw_part->fdw_xact->status == FDW_XACT_PREPARING || + fdw_part->fdw_xact->status == FDW_XACT_PREPARED); + + left_fdwxacts++; + if (fdw_part->fdw_xact->status == FDW_XACT_PREPARED) + continue; + } + + /* + * Rollback all current foreign transaction. Since we're rollbacking + * the transaction it's too late even if we raise an error here. + * So we log it as warning. + */ + if (!fdw_part->rollback_foreign_xact(&fdw_part->foreign_xact)) + ereport(WARNING, + (errmsg("could not abort transaction on server \"%s\"", + fdw_part->foreign_xact.server->servername))); + } + + /* If we left some FdwXact entries, ask the resolver process */ + if (left_fdwxacts > 0) + { + ereport(WARNING, + (errmsg("left %u foreign transactions in in-doubt status", + left_fdwxacts))); + fdwxact_maybe_launch_resolver(true); + } + } + + ForgetAllFdwXactParticipants(); +} + +/* + * AtPrepare_FdwXacts + * + * If there are foreign servers involved in the transaction, this function + * prepares transactions on those servers. + * + * Note that it can happen that the transaction aborts after we prepared part + * of participants. In this case since we can change to abort we cannot forget + * FdwXactParticipantsForAC here. These are processed by the resolver process + * during aborting, or at EOXact_FdwXacts. + */ +void +AtPrepare_FdwXacts(void) +{ + /* If there are no foreign servers involved, we have no business here */ + if (FdwXactParticipantsForAC == NIL) + return; + + /* + * We cannot prepare distributed transaction if any foreign server of + * participants in the transaction isn't capable of two-phase commit. + */ + if ((MyXactFlags & XACT_FLAGS_FDWNOPREPARE) != 0) + ereport(ERROR, + (errcode(ERRCODE_T_R_INTEGRITY_CONSTRAINT_VIOLATION), + errmsg("can not prepare the transaction because some foreign servers involved in transaction can not prepare the transaction"))); + + /* Prepare transactions on participating foreign servers. */ + FdwXactPrepareForeignTransactions(); +} + +/* + * FdwXactResolveForeignTransaction + * + * Resolve the foreign transaction using the foreign data wrapper's transaction + * handler routine. The foreign transaction can be a dangling transaction + * that is not interested by nobody. If the fate of foreign transaction is + * not determined yet, it'sdetermined according to the status of corresponding + * local transaction. + * + * If the resolution is successful, remove the foreign transaction entry from + * the shared memory and also remove the corresponding on-disk file. + */ +static bool +FdwXactResolveForeignTransaction(FdwXact fdwxact) +{ + bool resolved; + bool is_commit; + ForeignServer *fserver; + ForeignDataWrapper *fdw; + UserMapping *user_mapping; + FdwRoutine *fdw_routine; + ForeignTransaction foreign_xact; + + Assert(fdwxact); + + /* + * Determine whether we commit or abort this foreign transaction. + */ + if (fdwxact->status == FDW_XACT_COMMITTING_PREPARED) + is_commit = true; + else if (fdwxact->status == FDW_XACT_ABORTING_PREPARED) + is_commit = false; + + /* + * If the local transaction is already committed, commit prepared + * foreign transaction. + */ + else if (TransactionIdDidCommit(fdwxact->local_xid)) + { + fdwxact->status = FDW_XACT_COMMITTING_PREPARED; + is_commit = true; + } + + /* + * If the local transaction is already aborted, abort prepared + * foreign transactions. + */ + else if (TransactionIdDidAbort(fdwxact->local_xid)) + { + fdwxact->status = FDW_XACT_ABORTING_PREPARED; + is_commit = false; + } + + /* + * The local transaction is not in progress but the foreign + * transaction is not prepared on the foreign server. This + * can happen when transaction failed after registered this + * entry but before actual preparing on the foreign server. + * So let's assume it aborted. + */ + else if (!TransactionIdIsInProgress(fdwxact->local_xid)) + is_commit = false; + + /* + * The Local transaction is in progress and foreign transaction + * state is neither committing or aborting. This should not + * happen because we cannot determine to do commit or abort for + * foreign transaction associated with the in-progress local + * transaction. + */ + else + ereport(ERROR, + (errmsg("cannot resolve foreign transaction associated with in-progress transaction %u on server %u", + fdwxact->local_xid, fdwxact->serverid))); + + /* Construct foreign server connection information for passing to API */ + fserver = GetForeignServer(fdwxact->serverid); + fdw = GetForeignDataWrapper(fserver->fdwid); + user_mapping = GetUserMapping(fdwxact->userid, fdwxact->serverid); + fdw_routine = GetFdwRoutine(fdw->fdwhandler); + foreign_xact.server = fserver; + foreign_xact.usermapping = user_mapping; + foreign_xact.fx_id = fdwxact->fdw_xact_id; + + /* Resolve the foreign transaction */ + Assert(fdw_routine->ResolveForeignTransaction); + resolved = fdw_routine->ResolveForeignTransaction(&foreign_xact, + is_commit); + + if (!resolved) + { + ForeignServer *fserver = GetForeignServer(fdwxact->serverid); + ereport(ERROR, + (errmsg("could not %s a prepared foreign transaction on server \"%s\"", + is_commit ? "commit" : "rollback", fserver->servername), + errdetail("local transaction id is %u, connected by user id %u", + fdwxact->local_xid, fdwxact->userid))); + } + else + { + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + if (fdwxact->ondisk) + RemoveFdwXactFile(fdwxact->dbid, fdwxact->local_xid, + fdwxact->serverid, fdwxact->userid, + true); + remove_fdw_xact(fdwxact); + LWLockRelease(FdwXactLock); + } + + return resolved; +} + +/* + * Return one FdwXact entry that matches to given arguments, otherwise + * return NULL. Since this function search FdwXact entry by unique key + * all arguments should be valid. + */ +static FdwXact +get_one_fdw_xact(Oid dbid, TransactionId xid, Oid serverid, Oid userid, + bool need_lock) +{ + List *fdw_xact_list; + + /* All search conditions must be valid values */ + Assert(TransactionIdIsValid(xid)); + Assert(OidIsValid(serverid)); + Assert(OidIsValid(userid)); + Assert(OidIsValid(dbid)); + + fdw_xact_list = get_fdw_xacts(dbid, xid, serverid, userid, need_lock); + + /* Could not find entry */ + if (fdw_xact_list == NIL) + return NULL; + + /* Must be one entry since we search it by the unique key */ + Assert(list_length(fdw_xact_list) == 1); + + return (FdwXact) linitial(fdw_xact_list); +} + +/* + * Return true if there is at least one prepared foreign transaction + * which matches given arguments. + */ +bool +fdw_xact_exists(Oid dbid, TransactionId xid, Oid serverid, Oid userid) +{ + List *fdw_xact_list; + + fdw_xact_list = get_fdw_xacts(dbid, xid, serverid, userid, true); + + return fdw_xact_list != NIL; +} + +/* + * Returns an array of all foreign prepared transactions for the user-level + * function pg_prepared_fdw_xacts. + * + * WARNING -- we return even those transactions whose information is not + * completely filled yet. The caller should filter them out if he doesn't want them. + * + * The returned array is palloc'd. + */ +static FdwXact +get_all_fdw_xacts(int *length) +{ + List *all_fdw_xacts; + ListCell *lc; + FdwXact fdw_xacts; + int num_fdw_xacts = 0; + + Assert(length != NULL); + + /* Get all entries */ + all_fdw_xacts = get_fdw_xacts(InvalidOid, InvalidTransactionId, + InvalidOid, InvalidOid, true); + + if (all_fdw_xacts == NIL) + { + *length = 0; + return NULL; + } + + fdw_xacts = (FdwXact) + palloc(sizeof(FdwXactData) * list_length(all_fdw_xacts)); + + /* Convert list to array of FdwXact */ + foreach(lc, all_fdw_xacts) + { + FdwXact fx = (FdwXact) lfirst(lc); + + memcpy(fdw_xacts + num_fdw_xacts, fx, + sizeof(FdwXactData)); + num_fdw_xacts++; + } + + *length = num_fdw_xacts; + list_free(all_fdw_xacts); + + return fdw_xacts; +} + +/* + * Return a list of FdwXact matched to given arguments. Otherwise return + * NIL. + */ +static List* +get_fdw_xacts(Oid dbid, TransactionId xid, Oid serverid, Oid userid, + bool need_lock) +{ + int i; + List *fdw_xact_list = NIL; + + if (need_lock) + LWLockAcquire(FdwXactLock, LW_SHARED); + + for (i = 0; i < FdwXactCtl->numFdwXacts; i++) + { + FdwXact fdw_xact = FdwXactCtl->fdw_xacts[i]; + bool matches = true; + + /* xid */ + if (xid != InvalidTransactionId && xid != fdw_xact->local_xid) + matches = false; + + /* dbid */ + if (OidIsValid(dbid) && fdw_xact->dbid != dbid) + matches = false; + + /* serverid */ + if (OidIsValid(serverid) && serverid != fdw_xact->serverid) + matches = false; + + /* userid */ + if (OidIsValid(userid) && fdw_xact->userid != userid) + matches = false; + + /* Append it if matched */ + if (matches) + fdw_xact_list = lappend(fdw_xact_list, fdw_xact); + } + + if (need_lock) + LWLockRelease(FdwXactLock); + + return fdw_xact_list; +} + +/* + * fdw_xact_redo + * Apply the redo log for a foreign transaction. + */ +void +fdw_xact_redo(XLogReaderState *record) +{ + char *rec = XLogRecGetData(record); + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + if (info == XLOG_FDW_XACT_INSERT) + { + /* + * Add fdwxact entry and set start/end lsn of the WAL record + * in FdwXact entry. + */ + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + FdwXactRedoAdd(XLogRecGetData(record), + record->ReadRecPtr, + record->EndRecPtr); + LWLockRelease(FdwXactLock); + } + else if (info == XLOG_FDW_XACT_REMOVE) + { + xl_fdw_xact_remove *record = (xl_fdw_xact_remove *) rec; + + /* Delete FdwXact entry and file if exists */ + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + FdwXactRedoRemove(record->dbid, record->xid, record->serverid, + record->userid, false); + LWLockRelease(FdwXactLock); + } + else + elog(ERROR, "invalid log type %d in foreign transction log record", info); + + return; +} + +/* + * Return a null-terminated foreign transaction identifier. Returned string + * value is used to identify foreign transaction. The identifier should not + * be same as any other concurrent prepared transaction identifier. + * + * To make the foreign transactionid, we should ideally use something like + * UUID, which gives unique ids with high probability, but that may be expensive + * here and UUID extension which provides the function to generate UUID is + * not part of the core code. + */ +static char * +generate_fdw_xact_identifier(Oid serverid, Oid userid) +{ + char* fdw_xact_id; + + fdw_xact_id = (char *)palloc(FDW_XACT_ID_MAX_LEN * sizeof(char)); + + snprintf(fdw_xact_id, FDW_XACT_ID_MAX_LEN, "%s_%ld_%d_%d", + "fx", Abs(random()), serverid, userid); + fdw_xact_id[strlen(fdw_xact_id)] = '\0'; + + return fdw_xact_id; +} + +/* + * CheckPointFdwXact + * + * We must fsync the foreign transaction state file that is valid or generated + * during redo and has a inserted LSN <= the checkpoint'S redo horizon. + * The foreign transaction entries and hence the corresponding files are expected + * to be very short-lived. By executing this function at the end, we might have + * lesser files to fsync, thus reducing some I/O. This is similar to + * CheckPointTwoPhase(). + * + * In order to avoid disk I/O while holding a light weight lock, the function + * first collects the files which need to be synced under FdwXactLock and then + * syncs them after releasing the lock. This approach creates a race condition: + * after releasing the lock, and before syncing a file, the corresponding + * foreign transaction entry and hence the file might get removed. The function + * checks whether that's true and ignores the error if so. + */ +void +CheckPointFdwXacts(XLogRecPtr redo_horizon) +{ + int cnt; + int serialized_fdw_xacts = 0; + + /* Quick get-away, before taking lock */ + if (max_prepared_foreign_xacts <= 0) + return; + + TRACE_POSTGRESQL_FDWXACT_CHECKPOINT_START(); + + LWLockAcquire(FdwXactLock, LW_SHARED); + + /* Another quick, before we allocate memory */ + if (FdwXactCtl->numFdwXacts <= 0) + { + LWLockRelease(FdwXactLock); + return; + } + + /* + * We are expecting there to be zero FdwXact that need to be copied to + * disk, so we perform all I/O while holding FdwXactLock for simplicity. + * This presents any new foreign xacts from preparing while this occurs, + * which shouldn't be a problem since the presence fo long-lived prepared + * foreign xacts indicated the transaction manager isn't active. + * + * It's also possible to move I/O out of the lock, but on every error we + * should check whether somebody committed our transaction in different + * backend. Let's leave this optimisation for future, if somebody will + * spot that this place cause bottleneck. + * + * Note that it isn't possible for there to be a FdwXact with a + * insert_end_lsn set prior to the last checkpoint yet is marked + * invalid, because of the efforts with delayChkpt. + */ + for (cnt = 0; cnt < FdwXactCtl->numFdwXacts; cnt++) + { + FdwXact fxact = FdwXactCtl->fdw_xacts[cnt]; + + if ((fxact->valid || fxact->inredo) && + !fxact->ondisk && + fxact->insert_end_lsn <= redo_horizon) + { + char *buf; + int len; + + XlogReadFdwXactData(fxact->insert_start_lsn, &buf, &len); + RecreateFdwXactFile(fxact->dbid, fxact->local_xid, + fxact->serverid, fxact->userid, + buf, len); + fxact->ondisk = true; + fxact->insert_start_lsn = InvalidXLogRecPtr; + fxact->insert_end_lsn = InvalidXLogRecPtr; + pfree(buf); + serialized_fdw_xacts++; + } + } + + LWLockRelease(FdwXactLock); + + /* + * Flush unconditionally the parent directory to make any information + * durable on disk. FdwXact files could have been removed and those + * removals need to be made persistent as well as any files newly created. + */ + fsync_fname(FDW_XACTS_DIR, true); + + TRACE_POSTGRESQL_FDWXACT_CHECKPOINT_DONE(); + + if (log_checkpoints && serialized_fdw_xacts > 0) + ereport(LOG, + (errmsg_plural("%u foreign transaction state file was written " + "for long-running prepared transactions", + "%u foreign transaction state files were written " + "for long-running prepared transactions", + serialized_fdw_xacts, + serialized_fdw_xacts))); +} + +/* + * Reads foreign transaction data from xlog. During checkpoint this data will + * be moved to fdwxact files and ReadFdwXactFile should be used instead. + * + * Note clearly that this function accesses WAL during normal operation, similarly + * to the way WALSender or Logical Decoding would do. It does not run during + * crash recovery or standby processing. + */ +static void +XlogReadFdwXactData(XLogRecPtr lsn, char **buf, int *len) +{ + XLogRecord *record; + XLogReaderState *xlogreader; + char *errormsg; + + xlogreader = XLogReaderAllocate(wal_segment_size, &read_local_xlog_page, NULL); + if (!xlogreader) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"), + errdetail("Failed while allocating an XLog reading processor."))); + + record = XLogReadRecord(xlogreader, lsn, &errormsg); + if (record == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read foreign transaction state from xlog at %X/%X", + (uint32) (lsn >> 32), + (uint32) lsn))); + + if (XLogRecGetRmid(xlogreader) != RM_FDW_XACT_ID || + (XLogRecGetInfo(xlogreader) & ~XLR_INFO_MASK) != XLOG_FDW_XACT_INSERT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("expected foreign transaction state data is not present in xlog at %X/%X", + (uint32) (lsn >> 32), + (uint32) lsn))); + + if (len != NULL) + *len = XLogRecGetDataLen(xlogreader); + + *buf = palloc(sizeof(char) * XLogRecGetDataLen(xlogreader)); + memcpy(*buf, XLogRecGetData(xlogreader), sizeof(char) * XLogRecGetDataLen(xlogreader)); + + XLogReaderFree(xlogreader); +} + +/* + * Recreates a foreign transaction state file. This is used in WAL replay + * and during checkpoint creation. + * + * Note: content and len don't include CRC. + */ +void +RecreateFdwXactFile(Oid dbid, TransactionId xid, Oid serverid, + Oid userid, void *content, int len) +{ + char path[MAXPGPATH]; + pg_crc32c statefile_crc; + int fd; + + /* Recompute CRC */ + INIT_CRC32C(statefile_crc); + COMP_CRC32C(statefile_crc, content, len); + FIN_CRC32C(statefile_crc); + + FdwXactFilePath(path, dbid, xid, serverid, userid); + + fd = OpenTransientFile(path, O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY); + + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not recreate foreign transaction state file \"%s\": %m", + path))); + + /* Write content and CRC */ + pgstat_report_wait_start(WAIT_EVENT_FDW_XACT_FILE_WRITE); + if (write(fd, content, len) != len) + { + pgstat_report_wait_end(); + CloseTransientFile(fd); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write foreign transcation state file: %m"))); + } + if (write(fd, &statefile_crc, sizeof(pg_crc32c)) != sizeof(pg_crc32c)) + { + pgstat_report_wait_end(); + CloseTransientFile(fd); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write foreign transcation state file: %m"))); + } + pgstat_report_wait_end(); + + /* + * We must fsync the file because the end-of-replay checkpoint will not do + * so, there being no FDWXACT in shared memory yet to tell it to. + */ + pgstat_report_wait_start(WAIT_EVENT_FDW_XACT_FILE_SYNC); + if (pg_fsync(fd) != 0) + { + pgstat_report_wait_end(); + CloseTransientFile(fd); + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync foreign transaction state file: %m"))); + } + pgstat_report_wait_end(); + + if (CloseTransientFile(fd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close foreign transaction file: %m"))); +} + +/* + * ProcessFdwXactBuffer + * + * Given a transaction id, userid and serverid read it either from disk + * or read it directly via shmem xlog record pointer using the provided + * "insert_start_lsn". + */ +static char * +ProcessFdwXactBuffer(Oid dbid, TransactionId xid, Oid serverid, + Oid userid, XLogRecPtr insert_start_lsn, bool fromdisk) +{ + TransactionId origNextXid = ShmemVariableCache->nextXid; + char *buf; + + Assert(LWLockHeldByMeInMode(FdwXactLock, LW_EXCLUSIVE)); + + if (!fromdisk) + Assert(insert_start_lsn != InvalidXLogRecPtr); + + if (TransactionIdFollowsOrEquals(xid, origNextXid)) + { + if (fromdisk) + { + ereport(WARNING, + (errmsg("removing future fdwxact state file for xid %u, server %u and user %u", + xid, serverid, userid))); + RemoveFdwXactFile(dbid, xid, serverid, userid, true); + } + else + { + ereport(WARNING, + (errmsg("removing future fdwxact state from memory for xid %u, server %u and user %u", + xid, serverid, userid))); + FdwXactRedoRemove(dbid, xid, serverid, userid, true); + } + return NULL; + } + + if (fromdisk) + { + buf = ReadFdwXactFile(dbid, xid, serverid, userid, true); + if (buf == NULL) + { + ereport(WARNING, + (errmsg("removing corrupt fdwxact state file for xid %u, server %u and user %u", + xid, serverid, userid))); + RemoveFdwXactFile(dbid, xid, serverid, userid, true); + return NULL; + } + } + else + { + /* Read xlog data */ + XlogReadFdwXactData(insert_start_lsn, &buf, NULL); + } + + return buf; +} + +/* + * Read and validate the foreign transaction state file. + * + * If it looks OK (has a valid magic number and CRC), return thecontents in + * a structure allocated in-memory. Otherwise return NULL. The structure can + * be later freed by the caller. + */ +static char * +ReadFdwXactFile(Oid dbid, TransactionId xid, Oid serverid, Oid userid, + bool give_warnings) +{ + char path[MAXPGPATH]; + int fd; + FdwXactOnDiskData *fxact_file_data; + struct stat stat; + uint32 crc_offset; + pg_crc32c calc_crc; + pg_crc32c file_crc; + char *buf; + + FdwXactFilePath(path, dbid, xid, serverid, userid); + + fd = OpenTransientFile(path, O_RDONLY | PG_BINARY); + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open FDW transaction state file \"%s\": %m", + path))); + + /* + * Check file length. We can determine a lower bound pretty easily. We + * set an upper bound to avoid palloc() failure on a corrupt file, though + * we can't guarantee that we won't get an out of memory error anyway, + * even on a valid file. + */ + if (fstat(fd, &stat)) + { + CloseTransientFile(fd); + if (give_warnings) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not stat FDW transaction state file \"%s\": %m", + path))); + return NULL; + } + + if (stat.st_size < (offsetof(FdwXactOnDiskData, fdw_xact_id) + + sizeof(pg_crc32c)) || + stat.st_size > MaxAllocSize) + { + CloseTransientFile(fd); + ereport(WARNING, + (errcode_for_file_access(), + errmsg("too large FDW transaction state file \"%s\": %m", + path))); + return NULL; + } + + crc_offset = stat.st_size - sizeof(pg_crc32c); + if (crc_offset != MAXALIGN(crc_offset)) + { + CloseTransientFile(fd); + return NULL; + } + + /* + * Ok, slurp in the file. + */ + buf = (char *) palloc(stat.st_size); + fxact_file_data = (FdwXactOnDiskData *) buf; + + /* Slurp the file */ + pgstat_report_wait_start(WAIT_EVENT_FDW_XACT_FILE_READ); + if (read(fd, buf, stat.st_size) != stat.st_size) + { + pgstat_report_wait_end(); + CloseTransientFile(fd); + if (give_warnings) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not read FDW transaction state file \"%s\": %m", + path))); + return NULL; + } + + pgstat_report_wait_end(); + CloseTransientFile(fd); + + /* + * Check the CRC. + */ + INIT_CRC32C(calc_crc); + COMP_CRC32C(calc_crc, buf, crc_offset); + FIN_CRC32C(calc_crc); + + file_crc = *((pg_crc32c *) (buf + crc_offset)); + + if (!EQ_CRC32C(calc_crc, file_crc)) + { + pfree(buf); + return NULL; + } + + /* Check if the contents is an expected data */ + fxact_file_data = (FdwXactOnDiskData *) buf; + if (fxact_file_data->dbid != dbid || + fxact_file_data->serverid != serverid || + fxact_file_data->userid != userid || + fxact_file_data->local_xid != xid) + { + ereport(WARNING, + (errmsg("invalid foreign transaction state file \"%s\"", + path))); + CloseTransientFile(fd); + pfree(buf); + return NULL; + } + + return buf; +} + +/* + * PrescanFdwXacts + * + * Scan the all foreign transactions directory for oldest active transaction. + * This is run during database startup, after we completed reading WAL. + * ShmemVariableCache->nextXid has been set to one more than the highest XID + * for which evidence exists in WAL. + */ +TransactionId +PrescanFdwXacts(TransactionId oldestActiveXid) +{ + TransactionId nextXid = ShmemVariableCache->nextXid; + DIR *cldir; + struct dirent *clde; + + cldir = AllocateDir(FDW_XACTS_DIR); + while ((clde = ReadDir(cldir, FDW_XACTS_DIR)) != NULL) + { + if (strlen(clde->d_name) == FDW_XACT_FILE_NAME_LEN && + strspn(clde->d_name, "0123456789ABCDEF_") == FDW_XACT_FILE_NAME_LEN) + { + Oid dbid; + Oid serverid; + Oid userid; + TransactionId local_xid; + + sscanf(clde->d_name, "%08x_%08x_%08x_%08x", + &dbid, &local_xid, &serverid, &userid); + + /* + * Remove a foreign prepared transaction file corresponding to an + * XID, which is too new. + */ + if (TransactionIdFollowsOrEquals(local_xid, nextXid)) + { + ereport(WARNING, + (errmsg("removing future foreign prepared transaction file \"%s\"", + clde->d_name))); + RemoveFdwXactFile(dbid, local_xid, serverid, userid, true); + continue; + } + + if (TransactionIdPrecedesOrEquals(local_xid, oldestActiveXid)) + oldestActiveXid = local_xid; + } + } + + FreeDir(cldir); + return oldestActiveXid; +} + +/* + * restoreFdwXactData + * + * Scan pg_fdw_xact and fill FdwXact depending on the on-disk data. + * This is called once at the beginning of recovery, saving any extra + * lookups in the future. FdwXact files that are newer than the + * minimum XID horizon are discarded on the way. + */ +void +restoreFdwXactData(void) +{ + DIR *cldir; + struct dirent *clde; + + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + cldir = AllocateDir(FDW_XACTS_DIR); + while ((clde = ReadDir(cldir, FDW_XACTS_DIR)) != NULL) + { + if (strlen(clde->d_name) == FDW_XACT_FILE_NAME_LEN && + strspn(clde->d_name, "0123456789ABCDEF_") == FDW_XACT_FILE_NAME_LEN) + { + TransactionId local_xid; + Oid dbid; + Oid serverid; + Oid userid; + char *buf; + + sscanf(clde->d_name, "%08x_%08x_%08x_%08x", + &dbid, &local_xid, &serverid, &userid); + + /* Read fdwxact data from disk */ + buf = ProcessFdwXactBuffer(dbid, local_xid, serverid, userid, + InvalidXLogRecPtr, true); + + if (buf == NULL) + continue; + + /* Add this entry into the table of foreign transactions */ + FdwXactRedoAdd(buf, InvalidXLogRecPtr, InvalidXLogRecPtr); + } + } + + LWLockRelease(FdwXactLock); + FreeDir(cldir); +} + +/* + * Remove the foreign transaction file for given entry. + * + * If giveWarning is false, do not complain about file-not-present; + * this is an expected case during WAL replay. + */ +static void +RemoveFdwXactFile(Oid dbid, TransactionId xid, Oid serverid, Oid userid, bool giveWarning) +{ + char path[MAXPGPATH]; + + FdwXactFilePath(path, dbid, xid, serverid, userid); + if (unlink(path) < 0 && (errno != ENOENT || giveWarning)) + ereport(WARNING, + (errcode_for_file_access(), + errmsg("could not remove foreign transaction state file \"%s\": %m", + path))); +} + +/* + * FdwXactRedoAdd + * + * Store pointer to the start/end of the WAL record along with the xid in + * a fdwxact entry in shared memory FdwXactData structure. + */ +static void +FdwXactRedoAdd(char *buf, XLogRecPtr start_lsn, XLogRecPtr end_lsn) +{ + FdwXactOnDiskData *fxact_data = (FdwXactOnDiskData *) buf; + FdwXact fxact; + + Assert(LWLockHeldByMeInMode(FdwXactLock, LW_EXCLUSIVE)); + Assert(RecoveryInProgress()); + + /* + * Add this entry into the table of foreign transactions. The + * status of the transaction is set as preparing, since we do not + * know the exact status right now. Resolver will set it later + * based on the status of local transaction which prepared this + * foreign transaction. + */ + fxact = insert_fdw_xact(fxact_data->dbid, fxact_data->local_xid, + fxact_data->serverid, fxact_data->userid, + fxact_data->fdw_xact_id); + + /* + * Set status as preparing, since we do not know the xact status + * right now. Resolver will set it later based on the status of + * local transaction that prepared this fdwxact entry. + */ + fxact->status = FDW_XACT_PREPARING; + fxact->insert_start_lsn = start_lsn; + fxact->insert_end_lsn = end_lsn; + fxact->inredo = true; /* added in redo */ + fxact->valid = false; + fxact->ondisk = XLogRecPtrIsInvalid(start_lsn); +} + +/* + * FdwXactRedoRemove + * + * Remove the corresponding fdw_xact entry from FdwXactCtl. + * Also remove fdw_xact file if a foreign transaction was saved + * via an earlier checkpoint. + */ +void +FdwXactRedoRemove(Oid dbid, TransactionId xid, Oid serverid, + Oid userid, bool givewarning) +{ + FdwXact fdwxact; + + Assert(LWLockHeldByMeInMode(FdwXactLock, LW_EXCLUSIVE)); + Assert(RecoveryInProgress()); + + fdwxact = get_one_fdw_xact(dbid, xid, serverid, userid, + false); + + if (fdwxact == NULL) + return; + + /* Clean up entry and any files we may have left */ + if (fdwxact->ondisk) + RemoveFdwXactFile(fdwxact->dbid, fdwxact->local_xid, + fdwxact->serverid, fdwxact->userid, + givewarning); + remove_fdw_xact(fdwxact); +} + +/* + * Scan the shared memory entries of FdwXact and valid them. + * + * This is run at the end of recovery, but before we allow backends to write + * WAL. + */ +void +RecoverFdwXacts(void) +{ + int i; + + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + for (i = 0; i < FdwXactCtl->numFdwXacts; i++) + { + FdwXact fdwxact = FdwXactCtl->fdw_xacts[i]; + char *buf; + + buf = ProcessFdwXactBuffer(fdwxact->dbid, fdwxact->local_xid, + fdwxact->serverid, fdwxact->userid, + fdwxact->insert_start_lsn, fdwxact->ondisk); + + if (buf == NULL) + continue; + + ereport(LOG, + (errmsg("recovering foreign transaction %u for server %u and user %u from shared memory", + fdwxact->local_xid, fdwxact->serverid, fdwxact->userid))); + + fdwxact->inredo = false; + fdwxact->valid = true; + } + LWLockRelease(FdwXactLock); +} + +bool +check_foreign_twophase_commit(bool *newval, void **extra, GucSource source) +{ + /* Parameter check */ + if (*newval && + (max_prepared_foreign_xacts == 0 || max_foreign_xact_resolvers == 0)) + { + GUC_check_errdetail("Cannot enable \"foreign_twophase_commit\" when " + "\"max_prepared_foreign_xacts\" or \"max_foreign_xact_resolvers\"" + "is zero value"); + return false; + } + + return true; +} + +/* Built in functions */ +/* + * Structure to hold and iterate over the foreign transactions to be displayed + * by the built-in functions. + */ +typedef struct +{ + FdwXact fdw_xacts; + int num_xacts; + int cur_xact; +} WorkingStatus; + +Datum +pg_prepared_fdw_xacts(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + WorkingStatus *status; + char *xact_status; + + if (SRF_IS_FIRSTCALL()) + { + TupleDesc tupdesc; + MemoryContext oldcontext; + int num_fdw_xacts = 0; + + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * Switch to memory context appropriate for multiple function calls + */ + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* build tupdesc for result tuples */ + /* this had better match pg_fdw_xacts view in system_views.sql */ + tupdesc = CreateTemplateTupleDesc(6, false); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "dbid", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "transaction", + XIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "serverid", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 4, "userid", + OIDOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 5, "status", + TEXTOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 6, "identifier", + TEXTOID, -1, 0); + + funcctx->tuple_desc = BlessTupleDesc(tupdesc); + + /* + * Collect status information that we will format and send out as a + * result set. + */ + status = (WorkingStatus *) palloc(sizeof(WorkingStatus)); + funcctx->user_fctx = (void *) status; + + status->fdw_xacts = get_all_fdw_xacts(&num_fdw_xacts); + status->num_xacts = num_fdw_xacts; + status->cur_xact = 0; + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + status = funcctx->user_fctx; + + while (status->cur_xact < status->num_xacts) + { + FdwXact fdw_xact = &status->fdw_xacts[status->cur_xact++]; + Datum values[6]; + bool nulls[6]; + HeapTuple tuple; + Datum result; + + if (!fdw_xact->valid) + continue; + + /* + * Form tuple with appropriate data. + */ + MemSet(values, 0, sizeof(values)); + MemSet(nulls, 0, sizeof(nulls)); + + values[0] = ObjectIdGetDatum(fdw_xact->dbid); + values[1] = TransactionIdGetDatum(fdw_xact->local_xid); + values[2] = ObjectIdGetDatum(fdw_xact->serverid); + values[3] = ObjectIdGetDatum(fdw_xact->userid); + switch (fdw_xact->status) + { + case FDW_XACT_PREPARING: + xact_status = "prepared"; + break; + case FDW_XACT_COMMITTING_PREPARED: + xact_status = "committing"; + break; + case FDW_XACT_ABORTING_PREPARED: + xact_status = "aborting"; + break; + default: + xact_status = "unknown"; + break; + } + values[4] = CStringGetTextDatum(xact_status); + /* should this be really interpreted by FDW */ + values[5] = PointerGetDatum(cstring_to_text_with_len(fdw_xact->fdw_xact_id, + strlen(fdw_xact->fdw_xact_id))); + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + result = HeapTupleGetDatum(tuple); + SRF_RETURN_NEXT(funcctx, result); + } + + SRF_RETURN_DONE(funcctx); +} + +/* + * Built-in function to resolve a prepared foreign transaction manually. + */ +Datum +pg_resolve_fdw_xact(PG_FUNCTION_ARGS) +{ + TransactionId xid = DatumGetTransactionId(PG_GETARG_DATUM(0)); + Oid serverid = PG_GETARG_OID(1); + Oid userid = PG_GETARG_OID(2); + FdwXact fdwxact; + bool ret; + + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to resolve foreign transactions")))); + + fdwxact = get_one_fdw_xact(MyDatabaseId, xid, serverid, userid, true); + if (fdwxact == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + (errmsg("could not find foreign transaction entry")))); + + ret = FdwXactResolveForeignTransaction(fdwxact); + + PG_RETURN_BOOL(ret); +} + +/* + * Built-in function to remove a prepared foreign transaction entry without + * resolution. The function gives a way to forget about such prepared + * transaction in case: the foreign server where it is prepared is no longer + * available, the user which prepared this transaction needs to be dropped. + */ +Datum +pg_remove_fdw_xact(PG_FUNCTION_ARGS) +{ + TransactionId xid = DatumGetTransactionId(PG_GETARG_DATUM(0)); + Oid serverid = PG_GETARG_OID(1); + Oid userid = PG_GETARG_OID(2); + FdwXact fdwxact; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to remove foreign transactions")))); + + LWLockAcquire(FdwXactLock, LW_EXCLUSIVE); + + fdwxact = get_one_fdw_xact(MyDatabaseId, xid, serverid, userid, false); + if (fdwxact == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + (errmsg("could not find foreign transaction entry")))); + + remove_fdw_xact(fdwxact); + + LWLockRelease(FdwXactLock); + + PG_RETURN_VOID(); +} diff --git a/src/backend/foreign/fdwxact_launcher.c b/src/backend/foreign/fdwxact_launcher.c new file mode 100644 index 0000000..6782c33 --- /dev/null +++ b/src/backend/foreign/fdwxact_launcher.c @@ -0,0 +1,587 @@ +/*------------------------------------------------------------------------- + * + * fdwxact_launcher.c + * + * The foreign transaction resolver launcher process starts foreign + * transaction resolver processes. The launcher schedules resolver + * process to be started when arrived a requested by backend process. + * + * There is a shared memory area where the information of resolver process + * is stored. Requesting of starting new resolver process by backend process + * is done via that shared memory area. Note that the launcher is assuming + * that there is no more than one starting request for a database. + * + * Portions Copyright (c) 2018, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/foreign/fdwxact_launcher.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "funcapi.h" +#include "pgstat.h" +#include "funcapi.h" + +#include "foreign/fdwxact.h" +#include "foreign/fdwxact_launcher.h" +#include "foreign/fdwxact_resolver.h" +#include "foreign/resolver_internal.h" +#include "postmaster/bgworker.h" +#include "storage/ipc.h" +#include "storage/proc.h" +#include "tcop/tcopprot.h" + +#define DEFAULT_NAPTIME_PER_CYCLE 180000L + +static void fdwxact_launcher_onexit(int code, Datum arg); +static void fdwxact_launcher_sighup(SIGNAL_ARGS); +static void fdwxact_launch_resolver(Oid dbid, int slot); +static bool fdwxact_relaunch_resolvers(void); + +static volatile sig_atomic_t got_SIGHUP = false; +FdwXactResolver *MyFdwXactResolver = NULL; + +Datum pg_stat_get_fdwxact_resolver(PG_FUNCTION_ARGS); + +/* + * Wake up the launcher process. + */ +void +FdwXactLauncherWakeup(void) +{ + if (FdwXactRslvCtl->launcher_pid != InvalidPid) + kill(FdwXactRslvCtl->launcher_pid, SIGUSR1); +} + +/* Report shared memory space needed by FdwXactRsoverShmemInit */ +Size +FdwXactRslvShmemSize(void) +{ + Size size = 0; + + size = add_size(size, mul_size(max_foreign_xact_resolvers, + sizeof(FdwXactResolver))); + + return size; +} + +/* + * Allocate and initialize foreign transaction resolver shared + * memory. + */ +void +FdwXactRslvShmemInit(void) +{ + bool found; + + FdwXactRslvCtl = ShmemInitStruct("Foreign transactions resolvers", + FdwXactRslvShmemSize(), + &found); + + if (!IsUnderPostmaster) + { + int slot; + + /* First time through, so initialize */ + MemSet(FdwXactRslvCtl, 0, FdwXactRslvShmemSize()); + + SHMQueueInit(&(FdwXactRslvCtl->FdwXactQueue)); + + for (slot = 0; slot < max_foreign_xact_resolvers; slot++) + { + FdwXactResolver *resolver = &FdwXactRslvCtl->resolvers[slot]; + + resolver->pid = InvalidPid; + resolver->dbid = InvalidOid; + resolver->in_use = false; + SpinLockInit(&(resolver->mutex)); + } + } +} + +/* + * Cleanup function for fdwxact launcher + * + * Called on fdwxact launcher exit. + */ +static void +fdwxact_launcher_onexit(int code, Datum arg) +{ + FdwXactRslvCtl->launcher_pid = InvalidPid; +} + +/* SIGHUP: set flag to reload configuration at next convenient time */ +static void +fdwxact_launcher_sighup(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_SIGHUP = true; + + SetLatch(MyLatch); + + errno = save_errno; +} + +/* + * Main loop for the fdwxact launcher process. + */ +void +FdwXactLauncherMain(Datum main_arg) +{ + TimestampTz last_start_time = 0; + + ereport(DEBUG1, + (errmsg("fdwxact resolver launcher started"))); + + before_shmem_exit(fdwxact_launcher_onexit, (Datum) 0); + + Assert(FdwXactRslvCtl->launcher_pid == 0); + FdwXactRslvCtl->launcher_pid = MyProcPid; + + pqsignal(SIGHUP, fdwxact_launcher_sighup); + pqsignal(SIGTERM, die); + BackgroundWorkerUnblockSignals(); + + BackgroundWorkerInitializeConnection(NULL, NULL, 0); + + /* Enter main loop */ + for (;;) + { + TimestampTz now; + long wait_time = DEFAULT_NAPTIME_PER_CYCLE; + int rc; + + CHECK_FOR_INTERRUPTS(); + + now = GetCurrentTimestamp(); + + if (TimestampDifferenceExceeds(last_start_time, now, + foreign_xact_resolution_retry_interval)) + { + bool launched; + + /* + * Launch foreign transaction resolvers that are requested + * but not running. + */ + launched = fdwxact_relaunch_resolvers(); + if (launched) + last_start_time = now; + } + else + { + /* + * The wint in previous cycle was interrupted in less than + * foreign_xact_resolution_retry_interval since last resolver + * started, this usually means crash of the resolver, so we + * should retry in foreign_xact_resolution_retry_interval again. + */ + wait_time = foreign_xact_resolution_retry_interval; + } + + /* Wait for more work */ + rc = WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + wait_time, + WAIT_EVENT_FDW_XACT_LAUNCHER_MAIN); + + if (rc & WL_POSTMASTER_DEATH) + proc_exit(1); + + if (rc & WL_LATCH_SET) + { + ResetLatch(MyLatch); + CHECK_FOR_INTERRUPTS(); + } + + if (got_SIGHUP) + { + got_SIGHUP = false; + ProcessConfigFile(PGC_SIGHUP); + } + } + + /* Not reachable */ +} + +/* + * Request launcher to launch a new foreign transaction resolver worker + * if not running yet. A foreign transaction resolver worker is responsible + * for resolution of foreign transaction that are registered on a database. + * So if a resolver worker already is launched, we don't need to launch new + * one. + */ +void +fdwxact_maybe_launch_resolver(bool ignore_error) +{ + FdwXactResolver *resolver; + bool found = false; + int i; + + /* + * Looking for a resolver process that is running and working on the + * same database. + */ + LWLockAcquire(FdwXactResolverLock, LW_SHARED); + for (i = 0; i < max_foreign_xact_resolvers; i++) + { + resolver = &FdwXactRslvCtl->resolvers[i]; + + if (resolver->in_use && + resolver->pid != InvalidPid && + resolver->dbid == MyDatabaseId) + { + found = true; + break; + } + } + LWLockRelease(FdwXactResolverLock); + + /* + * If we found the resolver for my database, we don't need to launch new + * one but wake running worker up. + */ + if (found) + { + SetLatch(resolver->latch); + + elog(DEBUG1, "found a running foreign transaction resolver process for database %u", + MyDatabaseId); + + return; + } + + /* Looking for unused worker slot */ + LWLockAcquire(FdwXactResolverLock, LW_EXCLUSIVE); + for (i = 0; i < max_foreign_xact_resolvers; i++) + { + resolver = &FdwXactRslvCtl->resolvers[i]; + + if (!resolver->in_use) + { + found = true; + break; + } + } + + /* + * However if there are no more free worker slots, inform user about it before + * exiting. + */ + if (!found) + { + LWLockRelease(FdwXactResolverLock); + + ereport(ERROR, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("out of foreign trasanction resolver slots"), + errhint("You might need to increase max_foreign_transaction_resolvers."))); + return; + } + + Assert(resolver->pid == InvalidPid); + + /* Found a new resolver process */ + resolver->dbid = MyDatabaseId; + resolver->in_use = true; + + LWLockRelease(FdwXactResolverLock); + + /* Wake up launcher */ + FdwXactLauncherWakeup(); +} + +/* + * Launch a foreign transaction resolver process that will connect to given + * 'dbid' at 'slot' if given. If slot is negative value we find an unused slot. + * Note that caller must hold FdwXactResolverLock in exclusive mode. + */ +static void +fdwxact_launch_resolver(Oid dbid, int slot) +{ + BackgroundWorker bgw; + BackgroundWorkerHandle *bgw_handle; + FdwXactResolver *resolver; + int launch_slot = slot; + + /* If slot number is invalid, we find an unused slot */ + if (launch_slot < 0) + { + int i; + + for (i = 0; i < max_foreign_xact_resolvers; i++) + { + FdwXactResolver *resolver = &FdwXactRslvCtl->resolvers[i]; + + if (resolver->in_use && resolver->dbid == dbid) + return; + + if (!resolver->in_use) + { + launch_slot = i; + break; + } + } + } + + /* No unused found */ + if (launch_slot < 0) + ereport(ERROR, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("out of foreign trasanction resolver slots"), + errhint("You might need to increase max_foreign_transaction_resolvers."))); + + resolver = &FdwXactRslvCtl->resolvers[launch_slot]; + resolver->in_use = true; + resolver->dbid = dbid; + + /* Register the new dynamic worker */ + memset(&bgw, 0, sizeof(bgw)); + bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | + BGWORKER_BACKEND_DATABASE_CONNECTION; + bgw.bgw_start_time = BgWorkerStart_RecoveryFinished; + snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres"); + snprintf(bgw.bgw_function_name, BGW_MAXLEN, "FdwXactResolverMain"); + snprintf(bgw.bgw_name, BGW_MAXLEN, + "foreign transaction resolver for database %u", resolver->dbid); + snprintf(bgw.bgw_type, BGW_MAXLEN, "foreign transaction resolver"); + bgw.bgw_restart_time = BGW_NEVER_RESTART; + bgw.bgw_main_arg = Int32GetDatum(launch_slot); + bgw.bgw_notify_pid = (Datum) 0; + + if (!RegisterDynamicBackgroundWorker(&bgw, &bgw_handle)) + { + /* Failed to launch, cleanup the worker slot */ + SpinLockAcquire(&(MyFdwXactResolver->mutex)); + resolver->in_use = false; + SpinLockRelease(&(MyFdwXactResolver->mutex)); + + ereport(WARNING, + (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), + errmsg("out of background worker slots"), + errhint("You might need to increase max_worker_processes."))); + } + + /* + * We don't need to wait until it attaches here because we're going to wait + * until all foreign transactions are resolved. + */ +} + +/* + * Launch all foreign transaction resolvers that are required by backend process + * but not running. + */ +static bool +fdwxact_relaunch_resolvers(void) +{ + int i, j; + int num_launches = 0; + int num_unused_slots = 0; + int num_dbs = 0; + bool launched = false; + Oid *dbs_to_launch; + Oid *dbs_having_worker = palloc0(sizeof(Oid) * max_foreign_xact_resolvers); + + /* + * Launch resolver workers on the databases that are requested + * by backend processes. + */ + LWLockAcquire(FdwXactResolverLock, LW_EXCLUSIVE); + for (i = 0; i < max_foreign_xact_resolvers; i++) + { + FdwXactResolver *resolver = &FdwXactRslvCtl->resolvers[i]; + + /* Remember unused worker slots */ + if (!resolver->in_use) + num_unused_slots++; + + /* Remember databases that are having a resolve worker */ + if (OidIsValid(resolver->dbid)) + dbs_having_worker[num_dbs++] = resolver->dbid; + + /* Launch new foreign transaction resolver worker on the database */ + if (resolver->in_use && + OidIsValid(resolver->dbid) && + resolver->pid == InvalidPid) + { + fdwxact_launch_resolver(resolver->dbid, i); + launched = true; + } + } + LWLockRelease(FdwXactResolverLock); + + /* There is no unused slot, exit */ + if (num_unused_slots == 0) + return launched; + + dbs_to_launch = (Oid *) palloc(sizeof(Oid) * num_unused_slots); + + /* + * If there is unused slot, we can launch foreign transaction resolver + * on databases that has unresolved foreign transaction but doesn't + * have any resolver. This usually happens when resolvers crash for + * whatever reason. Scanning all FdwXact entries could takes time but + * since this is a relaunch case it's not harmless. + */ + LWLockAcquire(FdwXactLock, LW_SHARED); + for (i = 0; i < FdwXactCtl->numFdwXacts; i++) + { + FdwXact fdw_xact = FdwXactCtl->fdw_xacts[i]; + bool found = false; + + if (num_launches > num_unused_slots) + break; + + for (j = 0; j < num_dbs; j++) + { + if (dbs_having_worker[j] == fdw_xact->dbid) + { + found = true; + break; + } + } + + if (found) + continue; + + dbs_to_launch[num_launches++] = fdw_xact->dbid; + } + LWLockRelease(FdwXactLock); + + /* Launch resolver process for a database at any worker slot */ + LWLockAcquire(FdwXactResolverLock, LW_EXCLUSIVE); + for (i = 0; i < num_launches; i++) + { + fdwxact_launch_resolver(dbs_to_launch[i], -1); + launched = true; + } + LWLockRelease(FdwXactResolverLock); + + return launched; +} + +/* + * FdwXactLauncherRegister + * Register a background worker running the foreign transaction + * launcher. + */ +void +FdwXactLauncherRegister(void) +{ + BackgroundWorker bgw; + + if (max_foreign_xact_resolvers == 0) + return; + + memset(&bgw, 0, sizeof(bgw)); + bgw.bgw_flags = BGWORKER_SHMEM_ACCESS | + BGWORKER_BACKEND_DATABASE_CONNECTION; + bgw.bgw_start_time = BgWorkerStart_RecoveryFinished; + snprintf(bgw.bgw_library_name, BGW_MAXLEN, "postgres"); + snprintf(bgw.bgw_function_name, BGW_MAXLEN, "FdwXactLauncherMain"); + snprintf(bgw.bgw_name, BGW_MAXLEN, + "foreign transaction launcher"); + snprintf(bgw.bgw_type, BGW_MAXLEN, + "foreign transaction launcher"); + bgw.bgw_restart_time = 5; + bgw.bgw_notify_pid = 0; + bgw.bgw_main_arg = (Datum) 0; + + RegisterBackgroundWorker(&bgw); +} + +bool +IsFdwXactLauncher(void) +{ + return FdwXactRslvCtl->launcher_pid == MyProcPid; +} + + +/* + * Returns activity of foreign transaction resolvers, including pids, the number + * of tasks and the last resolution time. + */ +Datum +pg_stat_get_fdwxact_resolver(PG_FUNCTION_ARGS) +{ +#define PG_STAT_GET_FDWXACT_RESOLVERS_COLS 3 + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + Tuplestorestate *tupstore; + MemoryContext per_query_ctx; + MemoryContext oldcontext; + int i; + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " \ + "allowed in this context"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; + oldcontext = MemoryContextSwitchTo(per_query_ctx); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + for (i = 0; i < max_foreign_xact_resolvers; i++) + { + FdwXactResolver *resolver = &FdwXactRslvCtl->resolvers[i]; + pid_t pid; + Oid dbid; + TimestampTz last_resolved_time; + Datum values[PG_STAT_GET_FDWXACT_RESOLVERS_COLS]; + bool nulls[PG_STAT_GET_FDWXACT_RESOLVERS_COLS]; + + + SpinLockAcquire(&(MyFdwXactResolver->mutex)); + if (resolver->pid == 0) + { + SpinLockRelease(&(MyFdwXactResolver->mutex)); + continue; + } + + pid = resolver->pid; + dbid = resolver->dbid; + last_resolved_time = resolver->last_resolved_time; + SpinLockRelease(&(MyFdwXactResolver->mutex)); + + memset(nulls, 0, sizeof(nulls)); + /* pid */ + values[0] = Int32GetDatum(pid); + + /* dbid */ + values[1] = ObjectIdGetDatum(dbid); + + /* last_resolved_time */ + if (last_resolved_time == 0) + nulls[2] = true; + else + values[2] = TimestampTzGetDatum(last_resolved_time); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + } + + /* clean up and return the tuplestore */ + tuplestore_donestoring(tupstore); + + return (Datum) 0; +} diff --git a/src/backend/foreign/fdwxact_resolver.c b/src/backend/foreign/fdwxact_resolver.c new file mode 100644 index 0000000..7f7ff8f --- /dev/null +++ b/src/backend/foreign/fdwxact_resolver.c @@ -0,0 +1,310 @@ +/*------------------------------------------------------------------------- + * + * fdwxact_resolver.c + * + * The foreign transaction resolver background worker resolves foreign + * transactions that participate to a distributed transaction. A resolver + * process is started by foreign transaction launcher for every databases. + * + * A resolver process continues to resolve foreign transactions on a database + * It resolves two types of foreign transactions: on-line foreign transaction + * and dangling foreign transaction. The on-line foreign transaction is a + * foreign transaction that a concurrent backend process is waiting for + * resolution. The dangling transaction is a foreign transaction that corresponding + * distributed transaction ended up in in-doubt state. A resolver process + * doesn' exit as long as there is at least one unresolved foreign transaction + * on the database even if the timeout has come. + * + * Normal termination is by SIGTERM, which instructs the resolver process + * to exit(0) at the next convenient moment. Emergency termination is by + * SIGQUIT; like any backend. The resolver process also terminate by timeouts + * only if there is no pending foreign transactions on the database waiting + * to be resolved. + * + * Portions Copyright (c) 2018, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/foreign/fdwxact_resolver.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include +#include + +#include "access/transam.h" +#include "access/xact.h" +#include "commands/dbcommands.h" +#include "foreign/fdwxact.h" +#include "foreign/fdwxact_resolver.h" +#include "foreign/fdwxact_launcher.h" +#include "foreign/resolver_internal.h" +#include "funcapi.h" +#include "libpq/libpq.h" +#include "miscadmin.h" +#include "pgstat.h" +#include "postmaster/bgworker.h" +#include "storage/ipc.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/timeout.h" +#include "utils/timestamp.h" + +/* GUC parameters */ +int foreign_xact_resolution_retry_interval; +int foreign_xact_resolver_timeout = 60 * 1000; + +//static MemoryContext ResolveContext = NULL; +FdwXactRslvCtlData *FdwXactRslvCtl; + +static void FdwXactRslvLoop(void); +static long FdwXactRslvComputeSleepTime(TimestampTz now); +static void FdwXactRslvCheckTimeout(TimestampTz now); + +static void fdwxact_resolver_sighup(SIGNAL_ARGS); +static void fdwxact_resolver_onexit(int code, Datum arg); +static void fdwxact_resolver_detach(void); +static void fdwxact_resolver_attach(int slot); + +/* Flags set by signal handlers */ +static volatile sig_atomic_t got_SIGHUP = false; + +/* Set flag to reload configuration at next convenient time */ +static void +fdwxact_resolver_sighup(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_SIGHUP = true; + + SetLatch(MyLatch); + + errno = save_errno; +} + +/* + * Detach the resolver and cleanup the resolver info. + */ +static void +fdwxact_resolver_detach(void) +{ + /* Block concurrent access */ + LWLockAcquire(FdwXactResolverLock, LW_EXCLUSIVE); + + MyFdwXactResolver->pid = InvalidPid; + MyFdwXactResolver->in_use = false; + MyFdwXactResolver->dbid = InvalidOid; + + LWLockRelease(FdwXactResolverLock); +} + +/* + * Cleanup up foreign transaction resolver info. + */ +static void +fdwxact_resolver_onexit(int code, Datum arg) +{ + fdwxact_resolver_detach(); + FdwXactLauncherWakeup(); +} + +/* + * Attach to a slot. + */ +static void +fdwxact_resolver_attach(int slot) +{ + /* Block concurrent access */ + LWLockAcquire(FdwXactResolverLock, LW_EXCLUSIVE); + + Assert(slot >= 0 && slot < max_foreign_xact_resolvers); + MyFdwXactResolver = &FdwXactRslvCtl->resolvers[slot]; + + if (!MyFdwXactResolver->in_use) + { + LWLockRelease(FdwXactResolverLock); + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("foreign transaction resolver slot %d is empty, cannot attach", + slot))); + } + + MyFdwXactResolver->pid = MyProcPid; + MyFdwXactResolver->latch = &MyProc->procLatch; + TIMESTAMP_NOBEGIN(MyFdwXactResolver->last_resolved_time); + + before_shmem_exit(fdwxact_resolver_onexit, (Datum) 0); + + LWLockRelease(FdwXactResolverLock); +} + +/* Foreign transaction resolver entry point */ +void +FdwXactResolverMain(Datum main_arg) +{ + int slot = DatumGetInt32(main_arg); + + /* Attach to a slot */ + fdwxact_resolver_attach(slot); + + /* Establish signal handlers */ + pqsignal(SIGHUP, fdwxact_resolver_sighup); + pqsignal(SIGTERM, die); + BackgroundWorkerUnblockSignals(); + + /* Connect to our database */ + BackgroundWorkerInitializeConnectionByOid(MyFdwXactResolver->dbid, InvalidOid, 0); + + StartTransactionCommand(); + + ereport(LOG, + (errmsg("foreign transaction resolver for database \"%s\" has started", + get_database_name(MyFdwXactResolver->dbid)))); + + CommitTransactionCommand(); + + /* Initialize stats to a sanish value */ + MyFdwXactResolver->last_resolved_time = GetCurrentTimestamp(); + + /* Run the main loop */ + FdwXactRslvLoop(); + + proc_exit(0); +} + +/* + * Fdwxact resolver main loop + */ +static void +FdwXactRslvLoop(void) +{ + FdwXactResolveState *fstate; + + /* Create an FdwXactResolveState */ + fstate = CreateFdwXactResolveState(); + + /* Enter main loop */ + for (;;) + { + int rc; + TimestampTz now; + long sleep_time; + bool resolved; + + ResetLatch(MyLatch); + + CHECK_FOR_INTERRUPTS(); + + if (got_SIGHUP) + { + got_SIGHUP = false; + ProcessConfigFile(PGC_SIGHUP); + } + + /* Resolve a distributed transaction */ + StartTransactionCommand(); + resolved = FdwXactResolveDistributedTransaction(fstate); + CommitTransactionCommand(); + + now = GetCurrentTimestamp(); + + /* Update my state */ + if (resolved) + MyFdwXactResolver->last_resolved_time = now; + + /* Check for fdwxact resolver timeout */ + FdwXactRslvCheckTimeout(now); + + /* + * If we have resolved any distributed transaction we go the next + * without both resolving dangling transaction and sleeping because + * there might be other on-line transactions waiting to be resolved. + */ + if (!resolved) + { + /* Resolve dangling transactions as mush as possible */ + StartTransactionCommand(); + FdwXactResolveAllDanglingTransactions(MyDatabaseId); + CommitTransactionCommand(); + + sleep_time = FdwXactRslvComputeSleepTime(now); + + rc = WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + sleep_time, + WAIT_EVENT_FDW_XACT_RESOLVER_MAIN); + + if (rc & WL_POSTMASTER_DEATH) + proc_exit(1); + } + } +} + +/* + * Check whether there have been foreign transactions by the backend within + * foreign_xact_resolver_timeout and shutdown if not. + */ +static void +FdwXactRslvCheckTimeout(TimestampTz now) +{ + TimestampTz timeout; + + if (foreign_xact_resolver_timeout == 0) + return; + + timeout = TimestampTzPlusMilliseconds(MyFdwXactResolver->last_resolved_time, + foreign_xact_resolver_timeout); + + if (now < timeout) + return; + + /* + * Reached to the timeout. We exit if there is no more both pending on-line + * transactions and dangling transactions. + */ + if (!fdw_xact_exists(InvalidTransactionId, MyDatabaseId, InvalidOid, + InvalidOid)) + { + StartTransactionCommand(); + ereport(LOG, + (errmsg("foreign transaction resolver for database \"%s\" will stop because the timeout", + get_database_name(MyFdwXactResolver->dbid)))); + CommitTransactionCommand(); + + fdwxact_resolver_detach(); + proc_exit(0); + } +} + +/* + * Compute how long we should sleep by the next cycle. Return the sleep time + * in milliseconds, -1 means that we reached to the timeout and should exits + */ +static long +FdwXactRslvComputeSleepTime(TimestampTz now) +{ + static TimestampTz wakeuptime = 0; + long sleeptime; + long sec_to_timeout; + int microsec_to_timeout; + + if (now >= wakeuptime) + wakeuptime = TimestampTzPlusMilliseconds(now, + foreign_xact_resolution_retry_interval); + + /* Compute relative time until wakeup. */ + TimestampDifference(now, wakeuptime, + &sec_to_timeout, µsec_to_timeout); + + sleeptime = sec_to_timeout * 1000 + microsec_to_timeout / 1000; + + return sleeptime; +} + +bool +IsFdwXactResolver(void) +{ + return MyFdwXactResolver != NULL; +} diff --git a/src/backend/foreign/foreign.c b/src/backend/foreign/foreign.c index eac78a5..1873a24 100644 --- a/src/backend/foreign/foreign.c +++ b/src/backend/foreign/foreign.c @@ -155,6 +155,49 @@ GetForeignServerByName(const char *srvname, bool missing_ok) return GetForeignServer(serverid); } +/* + * GetUserMapping - look up the user mapping by user mapping oid. + * + * If userid of the mapping is invalid, we set it to current userid. + */ +UserMapping * +GetUserMappingByOid(Oid umid) +{ + Datum datum; + HeapTuple tp; + UserMapping *um; + bool isnull; + Form_pg_user_mapping tableform; + + tp = SearchSysCache1(USERMAPPINGOID, + ObjectIdGetDatum(umid)); + + if (!HeapTupleIsValid(tp)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("user mapping not found for %d", umid))); + + tableform = (Form_pg_user_mapping) GETSTRUCT(tp); + um = (UserMapping *) palloc(sizeof(UserMapping)); + um->umid = umid; + um->userid = OidIsValid(tableform->umuser) ? + tableform->umuser : GetUserId(); + um->serverid = tableform->umserver; + + /* Extract the umoptions */ + datum = SysCacheGetAttr(USERMAPPINGUSERSERVER, + tp, + Anum_pg_user_mapping_umoptions, + &isnull); + if (isnull) + um->options = NIL; + else + um->options = untransformRelOptions(datum); + + ReleaseSysCache(tp); + + return um; +} /* * GetUserMapping - look up the user mapping. diff --git a/src/backend/postmaster/bgworker.c b/src/backend/postmaster/bgworker.c index d2b695e..9243686 100644 --- a/src/backend/postmaster/bgworker.c +++ b/src/backend/postmaster/bgworker.c @@ -16,6 +16,8 @@ #include "libpq/pqsignal.h" #include "access/parallel.h" +#include "foreign/fdwxact_resolver.h" +#include "foreign/fdwxact_launcher.h" #include "miscadmin.h" #include "pgstat.h" #include "port/atomics.h" @@ -129,6 +131,12 @@ static const struct }, { "ApplyWorkerMain", ApplyWorkerMain + }, + { + "FdwXactResolverMain", FdwXactResolverMain + }, + { + "FdwXactLauncherMain", FdwXactLauncherMain } }; diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 8a5b2b3..a67b34d 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -3492,6 +3492,12 @@ pgstat_get_wait_activity(WaitEventActivity w) case WAIT_EVENT_CHECKPOINTER_MAIN: event_name = "CheckpointerMain"; break; + case WAIT_EVENT_FDW_XACT_RESOLVER_MAIN: + event_name = "FdwXactResolverMain"; + break; + case WAIT_EVENT_FDW_XACT_LAUNCHER_MAIN: + event_name = "FdwXactLauncherMain"; + break; case WAIT_EVENT_LOGICAL_LAUNCHER_MAIN: event_name = "LogicalLauncherMain"; break; @@ -3683,6 +3689,9 @@ pgstat_get_wait_ipc(WaitEventIPC w) case WAIT_EVENT_SYNC_REP: event_name = "SyncRep"; break; + case WAIT_EVENT_FDW_XACT_RESOLUTION: + event_name = "FdwXactResolution"; + break; /* no default case, so that compiler will warn */ } @@ -3898,6 +3907,15 @@ pgstat_get_wait_io(WaitEventIO w) case WAIT_EVENT_TWOPHASE_FILE_WRITE: event_name = "TwophaseFileWrite"; break; + case WAIT_EVENT_FDW_XACT_FILE_WRITE: + event_name = "FdwXactFileWrite"; + break; + case WAIT_EVENT_FDW_XACT_FILE_READ: + event_name = "FdwXactFileRead"; + break; + case WAIT_EVENT_FDW_XACT_FILE_SYNC: + event_name = "FdwXactFileSync"; + break; case WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ: event_name = "WALSenderTimelineHistoryRead"; break; diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 41de140..138dae4 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -100,6 +100,8 @@ #include "common/file_perm.h" #include "common/ip.h" #include "common/string.h" +#include "foreign/fdwxact_resolver.h" +#include "foreign/fdwxact_launcher.h" #include "lib/ilist.h" #include "libpq/auth.h" #include "libpq/libpq.h" @@ -905,6 +907,10 @@ PostmasterMain(int argc, char *argv[]) ereport(ERROR, (errmsg("WAL streaming (max_wal_senders > 0) requires wal_level \"replica\" or \"logical\""))); + if (max_prepared_foreign_xacts > 0 && max_foreign_xact_resolvers == 0) + ereport(ERROR, + (errmsg("preparing foreign transactions (max_prepared_foreign_transactions > 0) requires maX_foreign_xact_resolvers > 0"))); + /* * Other one-time internal sanity checks can go here, if they are fast. * (Put any slow processing further down, after postmaster.pid creation.) @@ -980,12 +986,13 @@ PostmasterMain(int argc, char *argv[]) #endif /* - * Register the apply launcher. Since it registers a background worker, - * it needs to be called before InitializeMaxBackends(), and it's probably - * a good idea to call it before any modules had chance to take the - * background worker slots. + * Register the apply launcher and foreign transaction launcher. Since + * it registers a background worker, it needs to be called before + * InitializeMaxBackends(), and it's probably a good idea to call it + * before any modules had chance to take the background worker slots. */ ApplyLauncherRegister(); + FdwXactLauncherRegister(); /* * process any libraries that should be preloaded at postmaster start diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index afb4972..960fd6a 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -154,6 +154,7 @@ LogicalDecodingProcessRecord(LogicalDecodingContext *ctx, XLogReaderState *recor case RM_COMMIT_TS_ID: case RM_REPLORIGIN_ID: case RM_GENERIC_ID: + case RM_FDW_XACT_ID: /* just deal with xid, and done */ ReorderBufferProcessXid(ctx->reorder, XLogRecGetXid(record), buf.origptr); diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 0c86a58..5f321fe 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -22,6 +22,7 @@ #include "access/subtrans.h" #include "access/twophase.h" #include "commands/async.h" +#include "foreign/fdwxact_launcher.h" #include "miscadmin.h" #include "pgstat.h" #include "postmaster/autovacuum.h" @@ -150,6 +151,8 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) size = add_size(size, SyncScanShmemSize()); size = add_size(size, AsyncShmemSize()); size = add_size(size, BackendRandomShmemSize()); + size = add_size(size, FdwXactShmemSize()); + size = add_size(size, FdwXactRslvShmemSize()); #ifdef EXEC_BACKEND size = add_size(size, ShmemBackendArraySize()); #endif @@ -270,6 +273,8 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) SyncScanShmemInit(); AsyncShmemInit(); BackendRandomShmemInit(); + FdwXactShmemInit(); + FdwXactRslvShmemInit(); #ifdef EXEC_BACKEND diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index bf2f4db..461ba5c 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -90,6 +90,8 @@ typedef struct ProcArrayStruct TransactionId replication_slot_xmin; /* oldest catalog xmin of any replication slot */ TransactionId replication_slot_catalog_xmin; + /* local transaction id of oldest unresolved distributed transaction */ + TransactionId fdwxact_unresolved_xmin; /* indexes into allPgXact[], has PROCARRAY_MAXPROCS entries */ int pgprocnos[FLEXIBLE_ARRAY_MEMBER]; @@ -245,6 +247,7 @@ CreateSharedProcArray(void) procArray->lastOverflowedXid = InvalidTransactionId; procArray->replication_slot_xmin = InvalidTransactionId; procArray->replication_slot_catalog_xmin = InvalidTransactionId; + procArray->fdwxact_unresolved_xmin = InvalidTransactionId; } allProcs = ProcGlobal->allProcs; @@ -1312,6 +1315,7 @@ GetOldestXmin(Relation rel, int flags) volatile TransactionId replication_slot_xmin = InvalidTransactionId; volatile TransactionId replication_slot_catalog_xmin = InvalidTransactionId; + volatile TransactionId fdwxact_unresolved_xmin = InvalidTransactionId; /* * If we're not computing a relation specific limit, or if a shared @@ -1373,6 +1377,7 @@ GetOldestXmin(Relation rel, int flags) /* fetch into volatile var while ProcArrayLock is held */ replication_slot_xmin = procArray->replication_slot_xmin; replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin; + fdwxact_unresolved_xmin = procArray->fdwxact_unresolved_xmin; if (RecoveryInProgress()) { @@ -1423,6 +1428,15 @@ GetOldestXmin(Relation rel, int flags) result = replication_slot_xmin; /* + * Check whether there are unresolved distributed transaction + * requiring an older xmin. + */ + if (!(flags & PROCARRAY_FDW_XACT_XMIN) && + TransactionIdIsValid(fdwxact_unresolved_xmin) && + NormalTransactionIdPrecedes(fdwxact_unresolved_xmin, result)) + result = fdwxact_unresolved_xmin; + + /* * After locks have been released and defer_cleanup_age has been applied, * check whether we need to back up further to make logical decoding * possible. We need to do so if we're computing the global limit (rel = @@ -2999,6 +3013,38 @@ ProcArrayGetReplicationSlotXmin(TransactionId *xmin, LWLockRelease(ProcArrayLock); } +/* + * ProcArraySetFdwXactUnresolvedXmin + * + * Install limits to future computations fo the xmin horizon to prevent + * vacuum clog from affected transactions still needed by resolving + * distributed transaction. + */ +void +ProcArraySetFdwXactUnresolvedXmin(TransactionId xmin) +{ + + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + procArray->fdwxact_unresolved_xmin = xmin; + LWLockRelease(ProcArrayLock); +} + +/* + * ProcArrayGetFdwXactUnresolvedXmin + * + * Return the current unresolved xmin limits. + */ +TransactionId +ProcArrayGetFdwXactUnresolvedXmin(void) +{ + TransactionId xmin; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + xmin = procArray->fdwxact_unresolved_xmin; + LWLockRelease(ProcArrayLock); + + return xmin; +} #define XidCacheRemove(i) \ do { \ diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index e6025ec..a42d06e 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -50,3 +50,5 @@ OldSnapshotTimeMapLock 42 BackendRandomLock 43 LogicalRepWorkerLock 44 CLogTruncationLock 45 +FdwXactLock 46 +FdwXactResolverLock 47 diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 6f9aaa5..ec09515 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -38,6 +38,7 @@ #include "access/transam.h" #include "access/twophase.h" #include "access/xact.h" +#include "foreign/fdwxact.h" #include "miscadmin.h" #include "pgstat.h" #include "postmaster/autovacuum.h" @@ -398,6 +399,10 @@ InitProcess(void) MyProc->syncRepState = SYNC_REP_NOT_WAITING; SHMQueueElemInit(&(MyProc->syncRepLinks)); + /* initialize fields for fdw xact */ + MyProc->fdwXactState = FDW_XACT_NOT_WAITING; + SHMQueueElemInit(&(MyProc->fdwXactLinks)); + /* Initialize fields for group XID clearing. */ MyProc->procArrayGroupMember = false; MyProc->procArrayGroupMemberXid = InvalidTransactionId; @@ -799,6 +804,9 @@ ProcKill(int code, Datum arg) /* Make sure we're out of the sync rep lists */ SyncRepCleanupAtProcExit(); + /* Make sure we're out of the fdwxact lists */ + FdwXactCleanupAtProcExit(); + #ifdef USE_ASSERT_CHECKING { int i; diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index e4c6e3d..f09955f 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -43,6 +43,8 @@ #include "commands/async.h" #include "commands/prepare.h" #include "executor/spi.h" +#include "foreign/fdwxact_resolver.h" +#include "foreign/fdwxact_launcher.h" #include "jit/jit.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" @@ -2971,6 +2973,18 @@ ProcessInterrupts(void) */ proc_exit(1); } + else if (IsFdwXactResolver()) + ereport(FATAL, + (errcode(ERRCODE_ADMIN_SHUTDOWN), + errmsg("terminating foreign transaction resolver due to administrator command"))); + else if (IsFdwXactLauncher()) + { + /* + * The foreign transaction launcher can be stopped at any time. + * Use exit status 1 so the background worker is restarted. + */ + proc_exit(1); + } else if (RecoveryConflictPending && RecoveryConflictRetryable) { pgstat_report_recovery_conflict(RecoveryConflictReason); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 0bec391..121d7bf 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -42,6 +42,7 @@ #include "commands/variable.h" #include "commands/trigger.h" #include "common/string.h" +#include "foreign/fdwxact.h" #include "funcapi.h" #include "jit/jit.h" #include "libpq/auth.h" @@ -660,6 +661,10 @@ const char *const config_group_names[] = gettext_noop("Client Connection Defaults / Other Defaults"), /* LOCK_MANAGEMENT */ gettext_noop("Lock Management"), + /* FDWXACT */ + gettext_noop("Foreign Transaction Management"), + /* FDWXACT_SETTINGS */ + gettext_noop("Foreign Transaction Management / Settings"), /* COMPAT_OPTIONS */ gettext_noop("Version and Platform Compatibility"), /* COMPAT_OPTIONS_PREVIOUS */ @@ -1832,6 +1837,16 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"foreign_twophase_commit", PGC_USERSET, FDWXACT_SETTINGS, + gettext_noop("Sets the usage of two-phase commit protocol for distributed transaction."), + NULL + }, + &foreign_twophase_commit, + false, + check_foreign_twophase_commit, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL @@ -2236,6 +2251,52 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + /* + * See also CheckRequiredParameterValues() if this parameter changes + */ + { + {"max_prepared_foreign_transactions", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Sets the maximum number of simultaneously prepared transactions on foreign servers."), + NULL + }, + &max_prepared_foreign_xacts, + 0, 0, INT_MAX, + NULL, NULL, NULL + }, + + { + {"foreign_transaction_resolver_timeout", PGC_SIGHUP, RESOURCES_ASYNCHRONOUS, + gettext_noop("Sets the maximum time to wait for foreign transaction resolution."), + NULL, + GUC_UNIT_MS + }, + &foreign_xact_resolver_timeout, + 60 * 1000, 0, INT_MAX, + NULL, NULL, NULL + }, + + { + {"max_foreign_transaction_resolvers", PGC_POSTMASTER, RESOURCES_MEM, + gettext_noop("Maximum number of foreign transaction resolution processes."), + NULL + }, + &max_foreign_xact_resolvers, + 0, 0, INT_MAX, + NULL, NULL, NULL + }, + + { + {"foreign_transaction_resolution_retry_interval", PGC_SIGHUP, RESOURCES_ASYNCHRONOUS, + gettext_noop("Sets the time to wait before retrying to resolve foreign transaction " + "after a failed attempt."), + NULL, + GUC_UNIT_MS + }, + &foreign_xact_resolution_retry_interval, + 5000, 1, INT_MAX, + NULL, NULL, NULL + }, + #ifdef LOCK_DEBUG { {"trace_lock_oidmin", PGC_SUSET, DEVELOPER_OPTIONS, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 4e61bc6..88cdc85 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -121,6 +121,8 @@ #temp_buffers = 8MB # min 800kB #max_prepared_transactions = 0 # zero disables the feature # (change requires restart) +#max_prepared_foreign_transactions = 0 # zero disables the feature + # (change requires restart) # Caution: it is not advisable to set max_prepared_transactions nonzero unless # you actively intend to use prepared transactions. #work_mem = 4MB # min 64kB @@ -287,6 +289,20 @@ #------------------------------------------------------------------------------ +# FOREIGN TRANSACTION +#------------------------------------------------------------------------------ + +#foreign_twophase_commit = off + +#max_foreign_transaction_resolvers = 0 # max number of resolver process + # (change requires restart) +#foreign_transaction_resolver_timeout = 60s # in milliseconds; 0 disables +#foreign_transaction_resolution_retry_interval = 5s # time to wait before + # retrying to resolve + # foreign transactions + # after a failed attempt + +#------------------------------------------------------------------------------ # QUERY TUNING #------------------------------------------------------------------------------ diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d index ad06e8e..ca3eb62 100644 --- a/src/backend/utils/probes.d +++ b/src/backend/utils/probes.d @@ -81,6 +81,8 @@ provider postgresql { probe multixact__checkpoint__done(bool); probe twophase__checkpoint__start(); probe twophase__checkpoint__done(); + probe fdwxact__checkpoint__start(); + probe fdwxact__checkpoint__done(); probe smgr__md__read__start(ForkNumber, BlockNumber, Oid, Oid, Oid, int); probe smgr__md__read__done(ForkNumber, BlockNumber, Oid, Oid, Oid, int, int, int); diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index ab5cb7f..609578c 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -209,6 +209,7 @@ static const char *const subdirs[] = { "pg_snapshots", "pg_subtrans", "pg_twophase", + "pg_fdw_xact", "pg_multixact", "pg_multixact/members", "pg_multixact/offsets", diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 895a51f..5f0683d 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -306,6 +306,8 @@ main(int argc, char *argv[]) ControlFile->max_worker_processes); printf(_("max_prepared_xacts setting: %d\n"), ControlFile->max_prepared_xacts); + printf(_("max_prepared_foreign_xacts setting: %d\n"), + ControlFile->max_prepared_foreign_xacts); printf(_("max_locks_per_xact setting: %d\n"), ControlFile->max_locks_per_xact); printf(_("track_commit_timestamp setting: %s\n"), diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index 6fb403a..6d867c8 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -730,6 +730,7 @@ GuessControlValues(void) ControlFile.MaxConnections = 100; ControlFile.max_worker_processes = 8; ControlFile.max_prepared_xacts = 0; + ControlFile.max_prepared_foreign_xacts = 0; ControlFile.max_locks_per_xact = 64; ControlFile.maxAlign = MAXIMUM_ALIGNOF; @@ -957,6 +958,7 @@ RewriteControlFile(void) ControlFile.MaxConnections = 100; ControlFile.max_worker_processes = 8; ControlFile.max_prepared_xacts = 0; + ControlFile.max_prepared_foreign_xacts = 0; ControlFile.max_locks_per_xact = 64; /* Contents are protected with a CRC */ diff --git a/src/bin/pg_waldump/rmgrdesc.c b/src/bin/pg_waldump/rmgrdesc.c index 852d8ca..15bfeb4 100644 --- a/src/bin/pg_waldump/rmgrdesc.c +++ b/src/bin/pg_waldump/rmgrdesc.c @@ -26,6 +26,7 @@ #include "commands/dbcommands_xlog.h" #include "commands/sequence.h" #include "commands/tablespace.h" +#include "foreign/fdwxact_xlog.h" #include "replication/message.h" #include "replication/origin.h" #include "rmgrdesc.h" diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h index 0bbe9879..c15dff7 100644 --- a/src/include/access/rmgrlist.h +++ b/src/include/access/rmgrlist.h @@ -47,3 +47,4 @@ PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_i PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL) PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask) PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL) +PG_RMGR(RM_FDW_XACT_ID, "Foreign Transactions", fdw_xact_redo, fdw_xact_desc, fdw_xact_identify, NULL, NULL, NULL) diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h index 0e932da..b199c88 100644 --- a/src/include/access/twophase.h +++ b/src/include/access/twophase.h @@ -36,6 +36,7 @@ extern void PostPrepare_Twophase(void); extern PGPROC *TwoPhaseGetDummyProc(TransactionId xid); extern BackendId TwoPhaseGetDummyBackendId(TransactionId xid); +extern bool TwoPhaseExists(TransactionId xid); extern GlobalTransaction MarkAsPreparing(TransactionId xid, const char *gid, TimestampTz prepared_at, diff --git a/src/include/access/xact.h b/src/include/access/xact.h index c7b4144..7180bd1 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -105,6 +105,13 @@ extern int MyXactFlags; #define XACT_FLAGS_WROTENONTEMPREL (1U << 2) /* + * XACT_FLAGS_FDWNONPREPARE - set when we wrote data on foreign table of which + * server isn't capable of two-phase commit + * relation. + */ +#define XACT_FLAGS_FDWNOPREPARE (1U << 3) + +/* * start- and end-of-transaction callbacks for dynamically loaded modules */ typedef enum diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 30610b3..795e85a 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -227,6 +227,7 @@ typedef struct xl_parameter_change int MaxConnections; int max_worker_processes; int max_prepared_xacts; + int max_prepared_foreign_xacts; int max_locks_per_xact; int wal_level; bool wal_log_hints; diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 773d9e6..3d5333a 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -178,6 +178,7 @@ typedef struct ControlFileData int MaxConnections; int max_worker_processes; int max_prepared_xacts; + int max_prepared_foreign_xacts; int max_locks_per_xact; bool track_commit_timestamp; diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 8e4145f..21e5bcc 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -5199,6 +5199,13 @@ proargmodes => '{i,o,o,o,o,o,o,o,o}', proargnames => '{subid,subid,relid,pid,received_lsn,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time}', prosrc => 'pg_stat_get_subscription' }, +{ oid => '6053', descr => 'statistics: information about foreign transaction resolver', + proname => 'pg_stat_get_fdwxact_resolver', proisstrict => 'f', provolatile => 's', + proparallel => 'r', prorettype => 'record', proargtypes => '', + proallargtypes => '{oid,oid,oid,timestamptz}', + proargmodes => '{o,o,o,o}', + proargnames => '{pid,dbid,n_entries,last_resolved_time}', + prosrc => 'pg_stat_get_fdwxact_resolver' }, { oid => '2026', descr => 'statistics: current backend PID', proname => 'pg_backend_pid', provolatile => 's', proparallel => 'r', prorettype => 'int4', proargtypes => '', prosrc => 'pg_backend_pid' }, @@ -5910,6 +5917,22 @@ proargnames => '{type,object_names,object_args,classid,objid,objsubid}', prosrc => 'pg_get_object_address' }, +{ oid => '6050', descr => 'view foreign transactions', + proname => 'pg_prepared_fdw_xacts', prorows => '1000', proretset => 't', + provolatile => 'v', prorettype => 'record', proargtypes => '', + proallargtypes => '{oid,xid,oid,oid,text,text}', + proargmodes => '{o,o,o,o,o,o}', + proargnames => '{dbid,transaction,serverid,userid,status,identifier}', + prosrc => 'pg_prepared_fdw_xacts' }, +{ oid => '6051', descr => 'remove foreign transaction', + proname => 'pg_remove_fdw_xact', provolatile => 'v', prorettype => 'bool', + proargtypes => 'xid oid oid', + prosrc => 'pg_remove_fdw_xact' }, +{ oid => '6052', descr => 'resolve foreign transaction', + proname => 'pg_resolve_fdw_xact', provolatile => 'v', prorettype => 'bool', + proargtypes => 'xid oid oid', + prosrc => 'pg_resolve_fdw_xact' }, + { oid => '2079', descr => 'is table visible in search path?', proname => 'pg_table_is_visible', procost => '10', provolatile => 's', prorettype => 'bool', proargtypes => 'oid', prosrc => 'pg_table_is_visible' }, diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index c14eb54..f76e83d 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -13,6 +13,7 @@ #define FDWAPI_H #include "access/parallel.h" +#include "foreign/fdwxact.h" #include "nodes/execnodes.h" #include "nodes/relation.h" @@ -168,6 +169,12 @@ typedef bool (*IsForeignScanParallelSafe_function) (PlannerInfo *root, typedef List *(*ReparameterizeForeignPathByChild_function) (PlannerInfo *root, List *fdw_private, RelOptInfo *child_rel); +typedef bool (*PrepareForeignTransaction_function) (ForeignTransaction *foreign_xact); +typedef bool (*CommitForeignTransaction_function) (ForeignTransaction *foreign_xact); +typedef bool (*RollbackForeignTransaction_function) (ForeignTransaction *foreing_xact); +typedef bool (*ResolveForeignTransaction_function) (ForeignTransaction *foreign_xact, + bool is_commit); +typedef bool (*IsTwoPhaseCommitEnabled_function) (Oid serverid); /* * FdwRoutine is the struct returned by a foreign-data wrapper's handler @@ -235,6 +242,13 @@ typedef struct FdwRoutine /* Support functions for IMPORT FOREIGN SCHEMA */ ImportForeignSchema_function ImportForeignSchema; + /* Support functions for distributed transactions */ + PrepareForeignTransaction_function PrepareForeignTransaction; + CommitForeignTransaction_function CommitForeignTransaction; + RollbackForeignTransaction_function RollbackForeignTransaction; + ResolveForeignTransaction_function ResolveForeignTransaction; + IsTwoPhaseCommitEnabled_function IsTwoPhaseCommitEnabled; + /* Support functions for parallelism under Gather node */ IsForeignScanParallelSafe_function IsForeignScanParallelSafe; EstimateDSMForeignScan_function EstimateDSMForeignScan; @@ -247,7 +261,6 @@ typedef struct FdwRoutine ReparameterizeForeignPathByChild_function ReparameterizeForeignPathByChild; } FdwRoutine; - /* Functions in foreign/foreign.c */ extern FdwRoutine *GetFdwRoutine(Oid fdwhandler); extern Oid GetForeignServerIdByRelId(Oid relid); @@ -258,4 +271,7 @@ extern bool IsImportableForeignTable(const char *tablename, ImportForeignSchemaStmt *stmt); extern Path *GetExistingLocalJoinPath(RelOptInfo *joinrel); +/* Functions in foreign/fdwxact.c */ +extern void FdwXactRegisterForeignTransaction(Oid serverid, Oid userid, char *fx_id); + #endif /* FDWAPI_H */ diff --git a/src/include/foreign/fdwxact.h b/src/include/foreign/fdwxact.h new file mode 100644 index 0000000..5138a2c --- /dev/null +++ b/src/include/foreign/fdwxact.h @@ -0,0 +1,147 @@ +/* + * fdwxact.h + * + * PostgreSQL distributed transaction manager + * + * Portions Copyright (c) 2018, PostgreSQL Global Development Group + * + * src/include/foreign/fdwxact.h + */ +#ifndef FDW_XACT_H +#define FDW_XACT_H + +#include "access/xlogreader.h" +#include "foreign/foreign.h" +#include "foreign/fdwxact_xlog.h" +#include "lib/stringinfo.h" +#include "miscadmin.h" +#include "nodes/pg_list.h" +#include "nodes/execnodes.h" +#include "storage/backendid.h" +#include "storage/proc.h" +#include "storage/shmem.h" +#include "utils/guc.h" +#include "utils/timeout.h" +#include "utils/timestamp.h" + +#define FDW_XACT_NOT_WAITING 0 +#define FDW_XACT_WAITING 1 +#define FDW_XACT_WAIT_COMPLETE 2 + +#define FdwXactEnabled() (max_prepared_foreign_xacts > 0) + +/* Maximum length of the prepared transaction id, borrowed from twophase.c */ +#define FDW_XACT_ID_MAX_LEN 200 + +/* Enum to track the status of prepared foreign transaction */ +typedef enum +{ + FDW_XACT_INITIAL, + FDW_XACT_PREPARING, /* foreign transaction is being prepared */ + FDW_XACT_PREPARED, /* foriegn transaction is prepared */ + FDW_XACT_COMMITTING_PREPARED, /* foreign prepared transaction is to + * be committed */ + FDW_XACT_ABORTING_PREPARED, /* foreign prepared transaction is to be + * aborted */ +} FdwXactStatus; + +/* Shared memory entry for a prepared or being prepared foreign transaction */ +typedef struct FdwXactData *FdwXact; + +typedef struct FdwXactData +{ + FdwXact fxact_free_next; /* Next free FdwXact entry */ + FdwXact fxact_next; /* Pointer to the neext FdwXact entry accosiated + * with the same transaction */ + Oid dbid; /* database oid where to find foreign server + * and user mapping */ + TransactionId local_xid; /* XID of local transaction */ + Oid serverid; /* foreign server where transaction takes + * place */ + Oid userid; /* user who initiated the foreign transaction */ + FdwXactStatus status; /* The state of the foreign + * transaction. This doubles as the + * action to be taken on this entry. */ + + /* + * Note that we need to keep track of two LSNs for each FdwXact. We keep + * track of the start LSN because this is the address we must use to read + * state data back from WAL when committing a FdwXact. We keep track of + * the end LSN because that is the LSN we need to wait for prior to + * commit. + */ + XLogRecPtr insert_start_lsn; /* XLOG offset of inserting this entry start */ + XLogRecPtr insert_end_lsn; /* XLOG offset of inserting this entry end */ + + bool valid; /* Has the entry been complete and written to file? */ + BackendId registered_backend; /* Backend who registered this entry */ + bool ondisk; /* TRUE if prepare state file is on disk */ + bool inredo; /* TRUE if entry was added via xlog_redo */ + char fdw_xact_id[FDW_XACT_MAX_ID_LEN]; /* prepared transaction identifier */ +} FdwXactData; + +/* Shared memory layout for maintaining foreign prepared transaction entries. */ +typedef struct +{ + /* Head of linked list of free FdwXactData structs */ + FdwXact freeFdwXacts; + + /* Number of valid foreign transaction entries */ + int numFdwXacts; + + /* Upto max_prepared_foreign_xacts entries in the array */ + FdwXact fdw_xacts[FLEXIBLE_ARRAY_MEMBER]; /* Variable length array */ +} FdwXactCtlData; + +/* Pointer to the shared memory holding the foreign transactions data */ +FdwXactCtlData *FdwXactCtl; + +/* Struct for foreign transaction resolution */ +typedef struct FdwXactResolveState +{ + Oid dbid; /* database oid */ + TransactionId wait_xid; /* local transaction id waiting to be resolved */ + PGPROC *waiter; /* backend process waiter */ + FdwXact fdwxact; /* foreign transaction entries to resolve */ +} FdwXactResolveState; + +/* Struct for foreign transaction passed to API */ +typedef struct ForeignTransaction +{ + ForeignServer *server; + UserMapping *usermapping; + char *fx_id; +} ForeignTransaction; + +/* GUC parameters */ +extern int max_prepared_foreign_xacts; +extern int max_foreign_xact_resolvers; +extern int foreign_xact_resolution_retry_interval; +extern int foreign_xact_resolver_timeout; +extern bool foreign_twophase_commit; + +extern Size FdwXactShmemSize(void); +extern void FdwXactShmemInit(void); +extern void restoreFdwXactData(void); +extern TransactionId PrescanFdwXacts(TransactionId oldestActiveXid); +extern void RecoverFdwXacts(void); +extern void AtEOXact_FdwXacts(bool is_commit); +extern void AtPrepare_FdwXacts(void); +extern bool fdw_xact_exists(TransactionId xid, Oid dboid, Oid serverid, + Oid userid); +extern void CheckPointFdwXacts(XLogRecPtr redo_horizon); +extern bool FdwTwoPhaseNeeded(void); +extern void PreCommit_FdwXacts(void); +extern void KnownFdwXactRecreateFiles(XLogRecPtr redo_horizon); +extern void FdwXactWaitToBeResolved(TransactionId wait_xid, bool commit); +extern bool FdwXactResolveDistributedTransaction(FdwXactResolveState *fstate); +extern void FdwXactResolveAllDanglingTransactions(Oid dbid); +extern bool ForeignTwophaseCommitRequired(void); +extern FdwXactResolveState *CreateFdwXactResolveState(void); +extern void FdwXactCleanupAtProcExit(void); +extern void FdwXactMarkForeignTransactionModified(ResultRelInfo *resultRelInfo, + int flags); +extern bool check_foreign_twophase_commit(bool *newval, void **extra, + GucSource source); + +#endif /* FDW_XACT_H */ diff --git a/src/include/foreign/fdwxact_launcher.h b/src/include/foreign/fdwxact_launcher.h new file mode 100644 index 0000000..6ed003b --- /dev/null +++ b/src/include/foreign/fdwxact_launcher.h @@ -0,0 +1,31 @@ +/*------------------------------------------------------------------------- + * + * fdwxact_launcher.h + * PostgreSQL foreign transaction launcher definitions + * + * + * Portions Copyright (c) 2018, PostgreSQL Global Development Group + * + * src/include/foreign/fdwxact_launcher.h + * + *------------------------------------------------------------------------- + */ + +#ifndef _FDWXACT_LAUNCHER_H +#define _FDWXACT_LAUNCHER_H + +#include "foreign/fdwxact.h" + +extern void FdwXactLauncherRegister(void); +extern void FdwXactLauncherMain(Datum main_arg); +extern void FdwXactLauncherWakeup(void); + +extern Size FdwXactRslvShmemSize(void); +extern void FdwXactRslvShmemInit(void); + +extern bool IsFdwXactLauncher(void); + +extern void fdwxact_maybe_launch_resolver(bool ignore_error); + + +#endif /* _FDWXACT_LAUNCHER_H */ diff --git a/src/include/foreign/fdwxact_resolver.h b/src/include/foreign/fdwxact_resolver.h new file mode 100644 index 0000000..5afd98c --- /dev/null +++ b/src/include/foreign/fdwxact_resolver.h @@ -0,0 +1,23 @@ +/*------------------------------------------------------------------------- + * + * fdwxact_resolver.h + * PostgreSQL foreign transaction resolver definitions + * + * + * Portions Copyright (c) 2018, PostgreSQL Global Development Group + * + * src/include/foreign/fdwxact_resolver.h + * + *------------------------------------------------------------------------- + */ +#ifndef FDWXACT_RESOLVER_H +#define FDWXACT_RESOLVER_H + +#include "foreign/fdwxact.h" + +extern void FdwXactResolverMain(Datum main_arg); +extern bool IsFdwXactResolver(void); + +extern int foreign_xact_resolver_timeout; + +#endif /* FDWXACT_RESOLVER_H */ diff --git a/src/include/foreign/fdwxact_xlog.h b/src/include/foreign/fdwxact_xlog.h new file mode 100644 index 0000000..f42725e --- /dev/null +++ b/src/include/foreign/fdwxact_xlog.h @@ -0,0 +1,51 @@ +/*------------------------------------------------------------------------- + * + * fdwxact_xlog.h + * Foreign transaction XLOG definitions. + * + * + * Portions Copyright (c) 2018, PostgreSQL Global Development Group + * + * src/include/foreign/fdwxact_xlog.h + * + *------------------------------------------------------------------------- + */ +#ifndef FDWXACT_XLOG_H +#define FDWXACT_XLOG_H + +#include "access/xlogreader.h" +#include "lib/stringinfo.h" + +/* Info types for logs related to FDW transactions */ +#define XLOG_FDW_XACT_INSERT 0x00 +#define XLOG_FDW_XACT_REMOVE 0x10 + +/* Same as GIDSIZE */ +#define FDW_XACT_MAX_ID_LEN 200 +/* + * On disk file structure, also used to WAL + */ +typedef struct +{ + TransactionId local_xid; + Oid dbid; /* database oid where to find foreign server + * and user mapping */ + Oid serverid; /* foreign server where transaction takes + * place */ + Oid userid; /* user who initiated the foreign transaction */ + char fdw_xact_id[FDW_XACT_MAX_ID_LEN]; /* foreign txn prepare id */ +} FdwXactOnDiskData; + +typedef struct xl_fdw_xact_remove +{ + TransactionId xid; + Oid serverid; + Oid userid; + Oid dbid; +} xl_fdw_xact_remove; + +extern void fdw_xact_redo(XLogReaderState *record); +extern void fdw_xact_desc(StringInfo buf, XLogReaderState *record); +extern const char *fdw_xact_identify(uint8 info); + +#endif /* FDWXACT_XLOG_H */ diff --git a/src/include/foreign/foreign.h b/src/include/foreign/foreign.h index 3ca12e6..d030368 100644 --- a/src/include/foreign/foreign.h +++ b/src/include/foreign/foreign.h @@ -68,10 +68,10 @@ typedef struct ForeignTable List *options; /* ftoptions as DefElem list */ } ForeignTable; - extern ForeignServer *GetForeignServer(Oid serverid); extern ForeignServer *GetForeignServerByName(const char *name, bool missing_ok); extern UserMapping *GetUserMapping(Oid userid, Oid serverid); +extern UserMapping *GetUserMappingByOid(Oid umid); extern ForeignDataWrapper *GetForeignDataWrapper(Oid fdwid); extern ForeignDataWrapper *GetForeignDataWrapperByName(const char *name, bool missing_ok); diff --git a/src/include/foreign/resolver_internal.h b/src/include/foreign/resolver_internal.h new file mode 100644 index 0000000..9f8676b --- /dev/null +++ b/src/include/foreign/resolver_internal.h @@ -0,0 +1,65 @@ +/*------------------------------------------------------------------------- + * + * resolver_internal.h + * Internal headers shared by fdwxact resolvers. + * + * Portions Copyright (c) 2018, PostgreSQL Global Development Group + * + * src/include/foreign/resovler_internal.h + * + *------------------------------------------------------------------------- + */ + +#ifndef _RESOLVER_INTERNAL_H +#define _RESOLVER_INTERNAL_H + +#include "storage/latch.h" +#include "storage/shmem.h" +#include "storage/spin.h" +#include "utils/timestamp.h" + +/* + * Each foreign transaction resolver has a FdwXactResolver struct in + * shared memory. This struct is protected by FdwXactResolverLaunchLock. + */ +typedef struct FdwXactResolver +{ + pid_t pid; /* this resolver's PID, or 0 if not active */ + Oid dbid; /* database oid */ + + /* Indicates if this slot is used of free */ + bool in_use; + + /* Stats */ + TimestampTz last_resolved_time; + + /* Protect shared variables shown above */ + slock_t mutex; + + /* + * Pointer to the resolver's patch. Used by backends to wake up this + * resolver when it has work to do. NULL if the resolver isn't active. + */ + Latch *latch; +} FdwXactResolver; + +/* There is one FdwXactRslvCtlData struct for the whole database cluster */ +typedef struct FdwXactRslvCtlData +{ + /* + * Foreign transaction resolution queue. Protected by FdwXactLock. + */ + SHM_QUEUE FdwXactQueue; + + /* Supervisor process */ + pid_t launcher_pid; + + FdwXactResolver resolvers[FLEXIBLE_ARRAY_MEMBER]; +} FdwXactRslvCtlData; + +extern FdwXactRslvCtlData *FdwXactRslvCtl; + +extern FdwXactResolver *MyFdwXactResolver; +extern FdwXactRslvCtlData *FdwXactRslvCtl; + +#endif /* _RESOLVER_INTERNAL_H */ diff --git a/src/include/pgstat.h b/src/include/pgstat.h index d59c24a..f74d1be 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -759,6 +759,8 @@ typedef enum WAIT_EVENT_BGWRITER_HIBERNATE, WAIT_EVENT_BGWRITER_MAIN, WAIT_EVENT_CHECKPOINTER_MAIN, + WAIT_EVENT_FDW_XACT_RESOLVER_MAIN, + WAIT_EVENT_FDW_XACT_LAUNCHER_MAIN, WAIT_EVENT_LOGICAL_LAUNCHER_MAIN, WAIT_EVENT_LOGICAL_APPLY_MAIN, WAIT_EVENT_PGSTAT_MAIN, @@ -832,7 +834,8 @@ typedef enum WAIT_EVENT_REPLICATION_ORIGIN_DROP, WAIT_EVENT_REPLICATION_SLOT_DROP, WAIT_EVENT_SAFE_SNAPSHOT, - WAIT_EVENT_SYNC_REP + WAIT_EVENT_SYNC_REP, + WAIT_EVENT_FDW_XACT_RESOLUTION } WaitEventIPC; /* ---------- @@ -912,6 +915,9 @@ typedef enum WAIT_EVENT_TWOPHASE_FILE_READ, WAIT_EVENT_TWOPHASE_FILE_SYNC, WAIT_EVENT_TWOPHASE_FILE_WRITE, + WAIT_EVENT_FDW_XACT_FILE_READ, + WAIT_EVENT_FDW_XACT_FILE_WRITE, + WAIT_EVENT_FDW_XACT_FILE_SYNC, WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ, WAIT_EVENT_WAL_BOOTSTRAP_SYNC, WAIT_EVENT_WAL_BOOTSTRAP_WRITE, diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index cb613c8..45880b2 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -153,6 +153,16 @@ struct PGPROC SHM_QUEUE syncRepLinks; /* list link if process is in syncrep queue */ /* + * Info to allow us to wait for foreign transaction to be resolved, if + * needed. + */ + TransactionId fdwXactWaitXid; /* waiting for foreign transaction involved with + * this transaction id to be resolved */ + int fdwXactState; /* wait state for foreign transaction + * resolution */ + SHM_QUEUE fdwXactLinks; /* list link if process is in queue */ + + /* * All PROCLOCK objects for locks held or awaited by this backend are * linked into one of these lists, according to the partition number of * their lock. diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index 75bab29..25d6a2f 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -36,6 +36,8 @@ #define PROCARRAY_SLOTS_XMIN 0x20 /* replication slot xmin, * catalog_xmin */ +#define PROCARRAY_FDW_XACT_XMIN 0x40 /* unresolved distributed + transaciton xmin */ /* * Only flags in PROCARRAY_PROC_FLAGS_MASK are considered when matching * PGXACT->vacuumFlags. Other flags are used for different purposes and @@ -124,4 +126,7 @@ extern void ProcArraySetReplicationSlotXmin(TransactionId xmin, extern void ProcArrayGetReplicationSlotXmin(TransactionId *xmin, TransactionId *catalog_xmin); + +extern void ProcArraySetFdwXactUnresolvedXmin(TransactionId xmin); +extern TransactionId ProcArrayGetFdwXactUnresolvedXmin(void); #endif /* PROCARRAY_H */ diff --git a/src/include/utils/guc_tables.h b/src/include/utils/guc_tables.h index 668d9ef..81560bd 100644 --- a/src/include/utils/guc_tables.h +++ b/src/include/utils/guc_tables.h @@ -94,6 +94,8 @@ enum config_group CLIENT_CONN_PRELOAD, CLIENT_CONN_OTHER, LOCK_MANAGEMENT, + FDWXACT, + FDWXACT_SETTINGS, COMPAT_OPTIONS, COMPAT_OPTIONS_PREVIOUS, COMPAT_OPTIONS_CLIENT, diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 078129f..31502a0 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1413,6 +1413,13 @@ pg_policies| SELECT n.nspname AS schemaname, FROM ((pg_policy pol JOIN pg_class c ON ((c.oid = pol.polrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))); +pg_prepared_fdw_xacts| SELECT f.dbid, + f.transaction, + f.serverid, + f.userid, + f.status, + f.identifier + FROM pg_prepared_fdw_xacts() f(dbid, transaction, serverid, userid, status, identifier); pg_prepared_statements| SELECT p.name, p.statement, p.prepare_time, @@ -1821,6 +1828,11 @@ pg_stat_database_conflicts| SELECT d.oid AS datid, pg_stat_get_db_conflict_bufferpin(d.oid) AS confl_bufferpin, pg_stat_get_db_conflict_startup_deadlock(d.oid) AS confl_deadlock FROM pg_database d; +pg_stat_fdwxact_resolvers| SELECT r.pid, + r.dbid, + r.last_resolved_time + FROM pg_stat_get_fdwxact_resolver() r(pid, dbid, n_entries, last_resolved_time) + WHERE (r.pid IS NOT NULL); pg_stat_progress_vacuum| SELECT s.pid, s.datid, d.datname, -- 2.10.5