diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c new file mode 100644 index a44c1b2..3a506e4 *** a/contrib/hstore/hstore_io.c --- b/contrib/hstore/hstore_io.c *************** hstore_from_record(PG_FUNCTION_ARGS) *** 839,844 **** --- 839,845 ---- ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; + HeapTupleSetZeroBase(&tuple); values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); *************** hstore_populate_record(PG_FUNCTION_ARGS) *** 985,990 **** --- 986,992 ---- ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; + HeapTupleSetZeroBase(&tuple); } /* diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c new file mode 100644 index 4f83467..da26496 *** a/contrib/pageinspect/btreefuncs.c --- b/contrib/pageinspect/btreefuncs.c *************** bt_page_stats(PG_FUNCTION_ARGS) *** 225,231 **** values[j++] = psprintf("%d", stat.free_size); values[j++] = psprintf("%d", stat.btpo_prev); values[j++] = psprintf("%d", stat.btpo_next); ! values[j++] = psprintf("%d", (stat.type == 'd') ? stat.btpo.xact : stat.btpo.level); values[j++] = psprintf("%d", stat.btpo_flags); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), --- 225,234 ---- values[j++] = psprintf("%d", stat.free_size); values[j++] = psprintf("%d", stat.btpo_prev); values[j++] = psprintf("%d", stat.btpo_next); ! if (stat.type == 'd') ! values[j++] = psprintf(XID_FMT, stat.btpo.xact); ! else ! values[j++] = psprintf("%d", stat.btpo.level); values[j++] = psprintf("%d", stat.btpo_flags); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out new file mode 100644 index 7f92249..5a94dc2 *** a/contrib/pageinspect/expected/btree.out --- b/contrib/pageinspect/expected/btree.out *************** live_items | 1 *** 21,27 **** dead_items | 0 avg_item_size | 16 page_size | 8192 ! free_size | 8132 btpo_prev | 0 btpo_next | 0 btpo | 0 --- 21,27 ---- dead_items | 0 avg_item_size | 16 page_size | 8192 ! free_size | 8124 btpo_prev | 0 btpo_next | 0 btpo | 0 diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c new file mode 100644 index ca4d3f5..c0b80d5 *** a/contrib/pageinspect/heapfuncs.c --- b/contrib/pageinspect/heapfuncs.c *************** heap_page_items(PG_FUNCTION_ARGS) *** 194,199 **** --- 194,200 ---- lp_offset == MAXALIGN(lp_offset) && lp_offset + lp_len <= raw_page_size) { + HeapTupleData tup; HeapTupleHeader tuphdr; bytea *tuple_data_bytea; int tuple_data_len; *************** heap_page_items(PG_FUNCTION_ARGS) *** 201,209 **** /* Extract information from the tuple header */ tuphdr = (HeapTupleHeader) PageGetItem(page, id); ! values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr)); ! values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr)); /* shared with xvac */ values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); values[7] = PointerGetDatum(&tuphdr->t_ctid); --- 202,212 ---- /* Extract information from the tuple header */ tuphdr = (HeapTupleHeader) PageGetItem(page, id); + tup.t_data = tuphdr; + HeapTupleCopyBaseFromPage(&tup, page); ! values[4] = TransactionIdGetDatum(HeapTupleGetXmin(&tup)); ! values[5] = TransactionIdGetDatum(HeapTupleGetRawXmax(&tup)); /* shared with xvac */ values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); values[7] = PointerGetDatum(&tuphdr->t_ctid); diff --git a/contrib/pageinspect/pageinspect--1.5.sql b/contrib/pageinspect/pageinspect--1.5.sql new file mode 100644 index 1e40c3c..fdbd299 *** a/contrib/pageinspect/pageinspect--1.5.sql --- b/contrib/pageinspect/pageinspect--1.5.sql *************** CREATE FUNCTION page_header(IN page byte *** 28,33 **** --- 28,35 ---- OUT special smallint, OUT pagesize smallint, OUT version smallint, + OUT xid_base xid, + OUT multi_base xid, OUT prune_xid xid) AS 'MODULE_PATHNAME', 'page_header' LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c new file mode 100644 index e374a9b..756e1e1 *** a/contrib/pageinspect/rawpage.c --- b/contrib/pageinspect/rawpage.c *************** *** 18,23 **** --- 18,24 ---- #include "pageinspect.h" #include "access/htup_details.h" + #include "commands/sequence.h" #include "catalog/catalog.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" *************** page_header(PG_FUNCTION_ARGS) *** 220,227 **** Datum result; HeapTuple tuple; ! Datum values[9]; ! bool nulls[9]; PageHeader page; XLogRecPtr lsn; --- 221,228 ---- Datum result; HeapTuple tuple; ! Datum values[11]; ! bool nulls[11]; PageHeader page; XLogRecPtr lsn; *************** page_header(PG_FUNCTION_ARGS) *** 270,292 **** values[5] = UInt16GetDatum(page->pd_special); values[6] = UInt16GetDatum(PageGetPageSize(page)); values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page)); if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HeapPageSpecialData))) { ! HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); if (pageSpecial->pd_magic == HEAP_PAGE_MAGIC || pageSpecial->pd_magic == SEQ_PAGE_MAGIC) { ! values[8] = TransactionIdGetDatum(pageSpecial->pd_prune_xid); nulls[8] = false; } else { nulls[8] = true; } } else { nulls[8] = true; } /* Build and return the tuple. */ --- 271,302 ---- values[5] = UInt16GetDatum(page->pd_special); values[6] = UInt16GetDatum(PageGetPageSize(page)); values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page)); + if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HeapPageSpecialData))) { ! HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); if (pageSpecial->pd_magic == HEAP_PAGE_MAGIC || pageSpecial->pd_magic == SEQ_PAGE_MAGIC) { ! values[8] = TransactionIdGetDatum(pageSpecial->pd_xid_base); ! values[9] = TransactionIdGetDatum(pageSpecial->pd_multi_base); ! values[10] = TransactionIdGetDatum(HeapPageGetPruneXid(page)); nulls[8] = false; + nulls[9] = false; + nulls[10] = false; } else { nulls[8] = true; + nulls[9] = true; + nulls[10] = true; } } else { nulls[8] = true; + nulls[9] = true; + nulls[10] = true; } /* Build and return the tuple. */ diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c new file mode 100644 index 2cc9575..1d91635 *** a/contrib/pg_visibility/pg_visibility.c --- b/contrib/pg_visibility/pg_visibility.c *************** tuple_all_visible(HeapTuple tup, Transac *** 755,761 **** * be set here. So just check the xmin. */ ! xmin = HeapTupleHeaderGetXmin(tup->t_data); if (!TransactionIdPrecedes(xmin, OldestXmin)) return false; /* xmin not old enough for all to see */ --- 755,761 ---- * be set here. So just check the xmin. */ ! xmin = HeapTupleGetXmin(tup); if (!TransactionIdPrecedes(xmin, OldestXmin)) return false; /* xmin not old enough for all to see */ diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c new file mode 100644 index eabca65..c5ba475 *** a/contrib/pgrowlocks/pgrowlocks.c --- b/contrib/pgrowlocks/pgrowlocks.c *************** pgrowlocks(PG_FUNCTION_ARGS) *** 152,158 **** htsu = HeapTupleSatisfiesUpdate(tuple, GetCurrentCommandId(false), scan->rs_cbuf); ! xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); infomask = tuple->t_data->t_infomask; /* --- 152,158 ---- htsu = HeapTupleSatisfiesUpdate(tuple, GetCurrentCommandId(false), scan->rs_cbuf); ! xmax = HeapTupleGetRawXmax(tuple); infomask = tuple->t_data->t_infomask; /* *************** pgrowlocks(PG_FUNCTION_ARGS) *** 168,174 **** PointerGetDatum(&tuple->t_self)); values[Atnum_xmax] = palloc(NCHARS * sizeof(char)); ! snprintf(values[Atnum_xmax], NCHARS, "%d", xmax); if (infomask & HEAP_XMAX_IS_MULTI) { MultiXactMember *members; --- 168,174 ---- PointerGetDatum(&tuple->t_self)); values[Atnum_xmax] = palloc(NCHARS * sizeof(char)); ! snprintf(values[Atnum_xmax], NCHARS, XID_FMT, xmax); if (infomask & HEAP_XMAX_IS_MULTI) { MultiXactMember *members; *************** pgrowlocks(PG_FUNCTION_ARGS) *** 209,215 **** strcat(values[Atnum_modes], ","); strcat(values[Atnum_pids], ","); } ! snprintf(buf, NCHARS, "%d", members[j].xid); strcat(values[Atnum_xids], buf); switch (members[j].status) { --- 209,215 ---- strcat(values[Atnum_modes], ","); strcat(values[Atnum_pids], ","); } ! snprintf(buf, NCHARS, XID_FMT, members[j].xid); strcat(values[Atnum_xids], buf); switch (members[j].status) { *************** pgrowlocks(PG_FUNCTION_ARGS) *** 250,256 **** values[Atnum_ismulti] = pstrdup("false"); values[Atnum_xids] = palloc(NCHARS * sizeof(char)); ! snprintf(values[Atnum_xids], NCHARS, "{%d}", xmax); values[Atnum_modes] = palloc(NCHARS); if (infomask & HEAP_XMAX_LOCK_ONLY) --- 250,256 ---- values[Atnum_ismulti] = pstrdup("false"); values[Atnum_xids] = palloc(NCHARS * sizeof(char)); ! snprintf(values[Atnum_xids], NCHARS, "{" XID_FMT "}", xmax); values[Atnum_modes] = palloc(NCHARS); if (infomask & HEAP_XMAX_LOCK_ONLY) diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c new file mode 100644 index 5bf0613..3cfdb19 *** a/contrib/pgstattuple/pgstatapprox.c --- b/contrib/pgstattuple/pgstatapprox.c *************** statapprox_heap(Relation rel, output_typ *** 151,156 **** --- 151,157 ---- tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyBaseFromPage(&tuple, page); /* * We count live and dead tuples, but we also need to add up diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c new file mode 100644 index fb65e2e..64a0bea *** a/contrib/postgres_fdw/postgres_fdw.c --- b/contrib/postgres_fdw/postgres_fdw.c *************** make_tuple_from_result_row(PGresult *res *** 5050,5055 **** --- 5050,5056 ---- */ if (ctid) tuple->t_self = tuple->t_data->t_ctid = *ctid; + HeapTupleSetZeroBase(tuple); /* * Stomp on the xmin, xmax, and cmin fields from the tuple created by *************** make_tuple_from_result_row(PGresult *res *** 5059,5066 **** * assumption. If we don't do this then, for example, the tuple length * ends up in the xmin field, which isn't what we want. */ ! HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); ! HeapTupleHeaderSetXmin(tuple->t_data, InvalidTransactionId); HeapTupleHeaderSetCmin(tuple->t_data, InvalidTransactionId); /* --- 5060,5067 ---- * assumption. If we don't do this then, for example, the tuple length * ends up in the xmin field, which isn't what we want. */ ! HeapTupleSetXmax(tuple, InvalidTransactionId); ! HeapTupleSetXmin(tuple, InvalidTransactionId); HeapTupleHeaderSetCmin(tuple->t_data, InvalidTransactionId); /* diff --git a/contrib/test_decoding/test_decoding.c b/contrib/test_decoding/test_decoding.c new file mode 100644 index 135b3b7..7acd9a4 *** a/contrib/test_decoding/test_decoding.c --- b/contrib/test_decoding/test_decoding.c *************** pg_output_begin(LogicalDecodingContext * *** 215,221 **** { OutputPluginPrepareWrite(ctx, last_write); if (data->include_xids) ! appendStringInfo(ctx->out, "BEGIN %u", txn->xid); else appendStringInfoString(ctx->out, "BEGIN"); OutputPluginWrite(ctx, last_write); --- 215,221 ---- { OutputPluginPrepareWrite(ctx, last_write); if (data->include_xids) ! appendStringInfo(ctx->out, "BEGIN " XID_FMT, txn->xid); else appendStringInfoString(ctx->out, "BEGIN"); OutputPluginWrite(ctx, last_write); *************** pg_decode_commit_txn(LogicalDecodingCont *** 233,239 **** OutputPluginPrepareWrite(ctx, true); if (data->include_xids) ! appendStringInfo(ctx->out, "COMMIT %u", txn->xid); else appendStringInfoString(ctx->out, "COMMIT"); --- 233,239 ---- OutputPluginPrepareWrite(ctx, true); if (data->include_xids) ! appendStringInfo(ctx->out, "COMMIT " XID_FMT, txn->xid); else appendStringInfoString(ctx->out, "COMMIT"); diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml new file mode 100644 index 9af77c1..b520131 *** a/doc/src/sgml/catalogs.sgml --- b/doc/src/sgml/catalogs.sgml *************** SCRAM-SHA-256$<iteration *** 1919,1926 **** All transaction IDs before this one have been replaced with a permanent (frozen) transaction ID in this table. This is used to track ! whether the table needs to be vacuumed in order to prevent transaction ! ID wraparound or to allow pg_xact to be shrunk. Zero (InvalidTransactionId) if the relation is not a table. --- 1919,1926 ---- All transaction IDs before this one have been replaced with a permanent (frozen) transaction ID in this table. This is used to track ! whether the table needs to be vacuumed in order to allow ! pg_xact to be shrunk. Zero (InvalidTransactionId) if the relation is not a table. *************** SCRAM-SHA-256$<iteration *** 1932,1939 **** All multixact IDs before this one have been replaced by a transaction ID in this table. This is used to track ! whether the table needs to be vacuumed in order to prevent multixact ID ! wraparound or to allow pg_multixact to be shrunk. Zero (InvalidMultiXactId) if the relation is not a table. --- 1932,1939 ---- All multixact IDs before this one have been replaced by a transaction ID in this table. This is used to track ! whether the table needs to be vacuumed in order to allow ! pg_multixact to be shrunk. Zero (InvalidMultiXactId) if the relation is not a table. *************** SCRAM-SHA-256$<iteration *** 2615,2622 **** All transaction IDs before this one have been replaced with a permanent (frozen) transaction ID in this database. This is used to ! track whether the database needs to be vacuumed in order to prevent ! transaction ID wraparound or to allow pg_xact to be shrunk. It is the minimum of the per-table pg_class.relfrozenxid values. --- 2615,2622 ---- All transaction IDs before this one have been replaced with a permanent (frozen) transaction ID in this database. This is used to ! track whether the database needs to be vacuumed in order to allow ! pg_xact to be shrunk. It is the minimum of the per-table pg_class.relfrozenxid values. *************** SCRAM-SHA-256$<iteration *** 2629,2636 **** All multixact IDs before this one have been replaced with a transaction ID in this database. This is used to ! track whether the database needs to be vacuumed in order to prevent ! multixact ID wraparound or to allow pg_multixact to be shrunk. It is the minimum of the per-table pg_class.relminmxid values. --- 2629,2636 ---- All multixact IDs before this one have been replaced with a transaction ID in this database. This is used to ! track whether the database needs to be vacuumed in order to allow ! pg_multixact to be shrunk. It is the minimum of the per-table pg_class.relminmxid values. diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml new file mode 100644 index 5f59a38..ac25cb3 *** a/doc/src/sgml/config.sgml --- b/doc/src/sgml/config.sgml *************** include_dir 'conf.d' *** 2153,2159 **** 1min) are only allowed because they may sometimes be useful for testing. While a setting as high as 60d is allowed, please note that in many workloads extreme bloat or ! transaction ID wraparound may occur in much shorter time frames. --- 2153,2160 ---- 1min) are only allowed because they may sometimes be useful for testing. While a setting as high as 60d is allowed, please note that in many workloads extreme bloat or ! page-level transaction ID wraparound may occur in much shorter time ! frames. *************** ANY num_s *** 3198,3204 **** ! vacuum_defer_cleanup_age (integer) vacuum_defer_cleanup_age configuration parameter --- 3199,3205 ---- ! vacuum_defer_cleanup_age (integer 64) vacuum_defer_cleanup_age configuration parameter *************** COPY postgres_log FROM '/full/path/to/lo *** 5896,5903 **** Note that even when this parameter is disabled, the system ! will launch autovacuum processes if necessary to ! prevent transaction ID wraparound. See for more information. --- 5897,5904 ---- Note that even when this parameter is disabled, the system ! will launch autovacuum processes if necessary to shrink ! pg_clog and pg_multixact. See for more information. *************** COPY postgres_log FROM '/full/path/to/lo *** 6041,6047 **** ! autovacuum_freeze_max_age (integer) autovacuum_freeze_max_age configuration parameter --- 6042,6048 ---- ! autovacuum_freeze_max_age (integer 64) autovacuum_freeze_max_age configuration parameter *************** COPY postgres_log FROM '/full/path/to/lo *** 6051,6065 **** Specifies the maximum age (in transactions) that a table's pg_class.relfrozenxid field can attain before a VACUUM operation is forced ! to prevent transaction ID wraparound within the table. ! Note that the system will launch autovacuum processes to ! prevent wraparound even when autovacuum is otherwise disabled. Vacuum also allows removal of old files from the ! pg_xact subdirectory, which is why the default ! is a relatively low 200 million transactions. This parameter can only be set at server start, but the setting can be reduced for individual tables by changing table storage parameters. --- 6052,6065 ---- Specifies the maximum age (in transactions) that a table's pg_class.relfrozenxid field can attain before a VACUUM operation is forced ! to shrink pg_clog. ! Note that the system will launch autovacuum processes for this purpose ! even when autovacuum is otherwise disabled. Vacuum also allows removal of old files from the ! pg_xact subdirectory. This parameter can only be set at server start, but the setting can be reduced for individual tables by changing table storage parameters. *************** COPY postgres_log FROM '/full/path/to/lo *** 6069,6075 **** ! autovacuum_multixact_freeze_max_age (integer) autovacuum_multixact_freeze_max_age configuration parameter --- 6069,6075 ---- ! autovacuum_multixact_freeze_max_age (integer 64) autovacuum_multixact_freeze_max_age configuration parameter *************** COPY postgres_log FROM '/full/path/to/lo *** 6078,6094 **** Specifies the maximum age (in multixacts) that a table's pg_class.relminmxid field can ! attain before a VACUUM operation is forced to ! prevent multixact ID wraparound within the table. ! Note that the system will launch autovacuum processes to ! prevent wraparound even when autovacuum is otherwise disabled. - Vacuuming multixacts also allows removal of old files from the - pg_multixact/members and pg_multixact/offsets - subdirectories, which is why the default is a relatively low - 400 million multixacts. This parameter can only be set at server start, but the setting can be reduced for individual tables by changing table storage parameters. For more information see . --- 6078,6090 ---- Specifies the maximum age (in multixacts) that a table's pg_class.relminmxid field can ! attain before a VACUUM operation is forced ! to shrink pg_multixact. ! Note that the system will launch autovacuum processes for this ! purpose even when autovacuum is otherwise disabled. This parameter can only be set at server start, but the setting can be reduced for individual tables by changing table storage parameters. For more information see . *************** COPY postgres_log FROM '/full/path/to/lo *** 6554,6560 **** ! vacuum_freeze_table_age (integer) vacuum_freeze_table_age configuration parameter --- 6550,6556 ---- ! vacuum_freeze_table_age (integer 64) vacuum_freeze_table_age configuration parameter *************** COPY postgres_log FROM '/full/path/to/lo *** 6567,6585 **** a regular VACUUM in that it visits every page that might contain unfrozen XIDs or MXIDs, not just those that might contain dead tuples. The default is 150 million transactions. Although users can ! set this value anywhere from zero to two billions, VACUUM will silently limit the effective value to 95% of , so that a periodical manual VACUUM has a chance to run before an ! anti-wraparound autovacuum is launched for the table. For more ! information see . ! vacuum_freeze_min_age (integer) vacuum_freeze_min_age configuration parameter --- 6563,6581 ---- a regular VACUUM in that it visits every page that might contain unfrozen XIDs or MXIDs, not just those that might contain dead tuples. The default is 150 million transactions. Although users can ! set this value anywhere from zero to 2^63 - 1, VACUUM will silently limit the effective value to 95% of , so that a periodical manual VACUUM has a chance to run before an ! autovacuum to shrink pg_clog and pg_multixact ! is launched for the table. For more information see . ! vacuum_freeze_min_age (integer 64) vacuum_freeze_min_age configuration parameter *************** COPY postgres_log FROM '/full/path/to/lo *** 6590,6596 **** should use to decide whether to freeze row versions while scanning a table. The default is 50 million transactions. Although ! users can set this value anywhere from zero to one billion, VACUUM will silently limit the effective value to half the value of , so that there is not an unreasonably short time between forced --- 6586,6592 ---- should use to decide whether to freeze row versions while scanning a table. The default is 50 million transactions. Although ! users can set this value anywhere from zero to 2^63 - 1, VACUUM will silently limit the effective value to half the value of , so that there is not an unreasonably short time between forced *************** COPY postgres_log FROM '/full/path/to/lo *** 6601,6607 **** ! vacuum_multixact_freeze_table_age (integer) vacuum_multixact_freeze_table_age configuration parameter --- 6597,6603 ---- ! vacuum_multixact_freeze_table_age (integer 64) vacuum_multixact_freeze_table_age configuration parameter *************** COPY postgres_log FROM '/full/path/to/lo *** 6614,6631 **** a regular VACUUM in that it visits every page that might contain unfrozen XIDs or MXIDs, not just those that might contain dead tuples. The default is 150 million multixacts. ! Although users can set this value anywhere from zero to two billions, VACUUM will silently limit the effective value to 95% of , so that a periodical manual VACUUM has a chance to run before an ! anti-wraparound is launched for the table. For more information see . ! vacuum_multixact_freeze_min_age (integer) vacuum_multixact_freeze_min_age configuration parameter --- 6610,6628 ---- a regular VACUUM in that it visits every page that might contain unfrozen XIDs or MXIDs, not just those that might contain dead tuples. The default is 150 million multixacts. ! Although users can set this value anywhere from zero to 2^63 - 1, VACUUM will silently limit the effective value to 95% of , so that a periodical manual VACUUM has a chance to run before an ! autovacuum to shrink pg_clog and pg_multixact ! is launched for the table. For more information see . ! vacuum_multixact_freeze_min_age (integer 64) vacuum_multixact_freeze_min_age configuration parameter *************** COPY postgres_log FROM '/full/path/to/lo *** 6636,6642 **** should use to decide whether to replace multixact IDs with a newer transaction ID or multixact ID while scanning a table. The default is 5 million multixacts. ! Although users can set this value anywhere from zero to one billion, VACUUM will silently limit the effective value to half the value of , so that there is not an unreasonably short time between forced --- 6633,6639 ---- should use to decide whether to replace multixact IDs with a newer transaction ID or multixact ID while scanning a table. The default is 5 million multixacts. ! Although users can set this value anywhere from zero to 2^63 - 1, VACUUM will silently limit the effective value to half the value of , so that there is not an unreasonably short time between forced diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml new file mode 100644 index 616aece..6a4b151 *** a/doc/src/sgml/maintenance.sgml --- b/doc/src/sgml/maintenance.sgml *************** *** 108,116 **** ! To protect against loss of very old data due to ! transaction ID wraparound or ! multixact ID wraparound. --- 108,115 ---- ! To shrink pg_clog and ! pg_multixact. *************** *** 376,382 **** ! Preventing Transaction ID Wraparound Failures transaction ID --- 375,381 ---- ! Forced shrinking <literal>pg_clog</> and <literal>pg_multixact</> transaction ID *************** *** 394,408 **** depend on being able to compare transaction ID (XID) numbers: a row version with an insertion XID greater than the current transaction's XID is in the future and should not be visible ! to the current transaction. But since transaction IDs have limited size ! (32 bits) a cluster that runs for a long time (more than 4 billion transactions) would suffer transaction ID wraparound: the XID counter wraps around to zero, and all of a sudden transactions that were in the past appear to be in the future — which ! means their output become invisible. In short, catastrophic data loss. ! (Actually the data is still there, but that's cold comfort if you cannot ! get at it.) To avoid this, it is necessary to vacuum every table ! in every database at least once every two billion transactions. --- 393,422 ---- depend on being able to compare transaction ID (XID) numbers: a row version with an insertion XID greater than the current transaction's XID is in the future and should not be visible ! to the current transaction. In older versions transaction IDs have ! limited size (32 bits), and a cluster that runs for a long time (more than 4 billion transactions) would suffer transaction ID wraparound: the XID counter wraps around to zero, and all of a sudden transactions that were in the past appear to be in the future — which ! means data loss as their output become invisible. ! ! ! ! Postgres Pro Enterprise 9.6 introduced ! 64-bit transaction IDs, which are not subject to wraparound and ! do not need modulo-232 arithmetic to be compared. ! Each tuple header contain two XIDs, so extending them would lead to ! high overhead. For that reason on-page XIDs are still 32-bit, but ! each page's header contains an offset, called epoch, ! to which they are added before comparing with each other. ! ! ! ! When new xid can't fit existing page according to its epoch, ! those epoch is shifted. Single page freeze takes place if ! needed. Both actions are performed "on the fly". Page-level wraparound ! can happen only when someone holds snapshot which is more than 4 billions ! transaction oid. *************** *** 411,435 **** they were inserted by a transaction that committed sufficiently far in the past that the effects of the inserting transaction are certain to be visible to all current and future transactions. ! Normal XIDs are ! compared using modulo-232 arithmetic. This means ! that for every normal XID, there are two billion XIDs that are ! older and two billion that are newer; another ! way to say it is that the normal XID space is circular with no ! endpoint. Therefore, once a row version has been created with a particular ! normal XID, the row version will appear to be in the past for ! the next two billion transactions, no matter which normal XID we are ! talking about. If the row version still exists after more than two billion ! transactions, it will suddenly appear to be in the future. To ! prevent this, PostgreSQL reserves a special XID, ! FrozenTransactionId, which does not follow the normal XID ! comparison rules and is always considered older than every normal XID. Frozen row versions are treated as if the inserting XID were FrozenTransactionId, so that they will appear to be ! in the past to all normal transactions regardless of wraparound ! issues, and so such row versions will be valid until deleted, no matter ! how long that is. --- 425,445 ---- they were inserted by a transaction that committed sufficiently far in the past that the effects of the inserting transaction are certain to be visible to all current and future transactions. ! &productname; reserves a special XID, ! FrozenTransactionId, which is always considered older than every normal XID. Frozen row versions are treated as if the inserting XID were FrozenTransactionId, so that they will appear to be ! in the past to all normal transactions. ! ! ! ! Freezing data by VACUUM is not needed anymore for preventing ! wraparound, since page-level freeze happens "on the fly". However, ! freezing data by VACUUM is still needed for shrink ! pg_clog and pg_multixact. For historical reasons, ! wording "autovacuum to prevent wraparound" is preserved for forced ! atuvacuum for shrink pg_clog and pg_multixact. *************** *** 481,492 **** ! The maximum time that a table can go unvacuumed is two billion ! transactions minus the vacuum_freeze_min_age value at ! the time of the last aggressive vacuum. If it were to go ! unvacuumed for longer than ! that, data loss could result. To ensure that this does not happen, ! autovacuum is invoked on any table that might contain unfrozen rows with XIDs older than the age specified by the configuration parameter . (This will happen even if autovacuum is disabled.) --- 491,497 ---- ! Autovacuum is invoked on any table that might contain unfrozen rows with XIDs older than the age specified by the configuration parameter . (This will happen even if autovacuum is disabled.) *************** *** 512,525 **** autovacuum_freeze_max_age; a setting higher than that will be capped to the maximum. A value higher than autovacuum_freeze_max_age wouldn't make sense because an ! anti-wraparound autovacuum would be triggered at that point anyway, and the 0.95 multiplier leaves some breathing room to run a manual VACUUM before that happens. As a rule of thumb, vacuum_freeze_table_age should be set to a value somewhat below autovacuum_freeze_max_age, leaving enough gap so that a regularly scheduled VACUUM or an autovacuum triggered by normal delete and update activity is run in that window. Setting it too ! close could lead to anti-wraparound autovacuums, even though the table was recently vacuumed to reclaim space, whereas lower values lead to more frequent aggressive vacuuming. --- 517,532 ---- autovacuum_freeze_max_age; a setting higher than that will be capped to the maximum. A value higher than autovacuum_freeze_max_age wouldn't make sense because an ! autovacuum to shrink pg_clog and pg_multixact ! would be triggered at that point anyway, and the 0.95 multiplier leaves some breathing room to run a manual VACUUM before that happens. As a rule of thumb, vacuum_freeze_table_age should be set to a value somewhat below autovacuum_freeze_max_age, leaving enough gap so that a regularly scheduled VACUUM or an autovacuum triggered by normal delete and update activity is run in that window. Setting it too ! close could lead to autovacuums to to shrink pg_clog and ! pg_multixact, even though the table was recently vacuumed to reclaim space, whereas lower values lead to more frequent aggressive vacuuming. *************** SELECT datname, age(datfrozenxid) FROM p *** 605,647 **** be forced for the table. - - If for some reason autovacuum fails to clear old XIDs from a table, - the system will begin to emit warning messages like this when the - database's oldest XIDs reach ten million transactions from the wraparound - point: - - - WARNING: database "mydb" must be vacuumed within 177009986 transactions - HINT: To avoid a database shutdown, execute a database-wide VACUUM in "mydb". - - - (A manual VACUUM should fix the problem, as suggested by the - hint; but note that the VACUUM must be performed by a - superuser, else it will fail to process system catalogs and thus not - be able to advance the database's datfrozenxid.) - If these warnings are - ignored, the system will shut down and refuse to start any new - transactions once there are fewer than 1 million transactions left - until wraparound: - - - ERROR: database is not accepting commands to avoid wraparound data loss in database "mydb" - HINT: Stop the postmaster and vacuum that database in single-user mode. - - - The 1-million-transaction safety margin exists to let the - administrator recover without data loss, by manually executing the - required VACUUM commands. However, since the system will not - execute commands once it has gone into the safety shutdown mode, - the only way to do this is to stop the server and start the server in single-user - mode to execute VACUUM. The shutdown mode is not enforced - in single-user mode. See the reference - page for details about using single-user mode. - - ! Multixacts and Wraparound MultiXactId --- 612,619 ---- be forced for the table. ! Shrinking <literal>pg_multixact</> MultiXactId *************** HINT: Stop the postmaster and vacuum th *** 662,673 **** particular multixact ID is stored separately in the pg_multixact subdirectory, and only the multixact ID appears in the xmax field in the tuple header. ! Like transaction IDs, multixact IDs are implemented as a ! 32-bit counter and corresponding storage, all of which requires ! careful aging management, storage cleanup, and wraparound handling. There is a separate storage area which holds the list of members in ! each multixact, which also uses a 32-bit counter and which must also ! be managed. --- 634,645 ---- particular multixact ID is stored separately in the pg_multixact subdirectory, and only the multixact ID appears in the xmax field in the tuple header. ! Like transaction IDs, multixact IDs are implemented on disk page as a ! 64-bit counter with an offset relative to epoch, and corresponding storage, ! which requires ! careful aging management, and storage cleanup. There is a separate storage area which holds the list of members in ! each multixact, which uses a 64-bit counter. *************** HINT: Stop the postmaster and vacuum th *** 695,708 **** ! As a safety device, an aggressive vacuum scan will occur for any table whose multixact-age is greater than ! . Aggressive ! vacuum scans will also occur progressively for all tables, starting with ! those that have the oldest multixact-age, if the amount of used member ! storage space exceeds the amount 50% of the addressable storage space. ! Both of these kinds of aggressive scans will occur even if autovacuum is ! nominally disabled. --- 667,675 ---- ! An aggressive vacuum scan will occur for any table whose multixact-age is greater than ! . diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml new file mode 100644 index 824253d..e776ad5 *** a/doc/src/sgml/ref/create_table.sgml --- b/doc/src/sgml/ref/create_table.sgml *************** FROM ( { *** 1188,1198 **** If true, the autovacuum daemon will perform automatic VACUUM and/or ANALYZE operations on this table following the rules discussed in . ! If false, this table will not be autovacuumed, except to prevent ! transaction ID wraparound. See for ! more about wraparound prevention. ! Note that the autovacuum daemon does not run at all (except to prevent ! transaction ID wraparound) if the parameter is false; setting individual tables' storage parameters does not override that. Therefore there is seldom much point in explicitly setting this storage parameter to true, only --- 1188,1199 ---- If true, the autovacuum daemon will perform automatic VACUUM and/or ANALYZE operations on this table following the rules discussed in . ! If false, this table will not be autovacuumed, except to shrink ! pg_clog and pg_multixact. ! See for more about that. ! Note that the autovacuum daemon does not run at all (except to shrink ! pg_clog and pg_multixact) if the ! parameter is false; setting individual tables' storage parameters does not override that. Therefore there is seldom much point in explicitly setting this storage parameter to true, only diff --git a/doc/src/sgml/ref/prepare_transaction.sgml b/doc/src/sgml/ref/prepare_transaction.sgml new file mode 100644 index 626753f..62f8a2c *** a/doc/src/sgml/ref/prepare_transaction.sgml --- b/doc/src/sgml/ref/prepare_transaction.sgml *************** PREPARE TRANSACTION ). Keep in mind also that the transaction continues to hold whatever locks it held. The intended usage of the feature is that a prepared transaction will normally be committed or rolled back as soon as an external transaction manager has verified that --- 125,131 ---- It is unwise to leave transactions in the prepared state for a long time. This will interfere with the ability of VACUUM to reclaim ! storage. Keep in mind also that the transaction continues to hold whatever locks it held. The intended usage of the feature is that a prepared transaction will normally be committed or rolled back as soon as an external transaction manager has verified that diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml new file mode 100644 index aed2cf8..ffa9955 *** a/doc/src/sgml/storage.sgml --- b/doc/src/sgml/storage.sgml *************** This information can also be used *** 646,652 **** by index-only scans to answer queries using only the index tuple. The second bit, if set, means that all tuples on the page have been frozen. - That means that even an anti-wraparound vacuum need not revisit the page. --- 646,651 ---- *************** data. Empty in ordinary tables. *** 772,778 **** ! The first 24 bytes of each page consists of a page header (PageHeaderData). Its format is detailed in . The first field tracks the most recent WAL entry related to this page. The second field contains --- 771,777 ---- ! The first 40 bytes of each page consists of a page header (PageHeaderData). Its format is detailed in . The first field tracks the most recent WAL entry related to this page. The second field contains *************** data. Empty in ordinary tables. *** 782,788 **** pd_upper, and pd_special). These contain byte offsets from the page start to the start of unallocated space, to the end of ! unallocated space, and to the start of the special space. The next 2 bytes of the page header, pd_pagesize_version, store both the page size and a version indicator. Beginning with PostgreSQL 8.3 the version number is 4; --- 781,789 ---- pd_upper, and pd_special). These contain byte offsets from the page start to the start of unallocated space, to the end of ! unallocated space, and to the start of the special space. ! Next are two 8-byte fields containing base, i. e. an offset, for ! short (4-byte) transacion IDs in this page. The next 2 bytes of the page header, pd_pagesize_version, store both the page size and a version indicator. Beginning with PostgreSQL 8.3 the version number is 4; *************** data. Empty in ordinary tables. *** 856,863 **** Page size and layout version number information ! pd_prune_xid TransactionId 4 bytes Oldest unpruned XMAX on page, or zero if none --- 857,876 ---- Page size and layout version number information ! pd_xid_base ! TransactionId ! 8 bytes ! Base for short 4-byte transaction IDs in this page ! ! ! pd_multi_base TransactionId + 8 bytes + Base for short 4-byte multixact IDs in this page + + + pd_prune_xid + ShortTransactionId 4 bytes Oldest unpruned XMAX on page, or zero if none *************** data. Empty in ordinary tables. *** 915,921 **** All table rows are structured in the same way. There is a fixed-size ! header (occupying 23 bytes on most machines), followed by an optional null bitmap, an optional object ID field, and the user data. The header is detailed in . The actual user data --- 928,934 ---- All table rows are structured in the same way. There is a fixed-size ! header (occupying 40 bytes on most machines), followed by an optional null bitmap, an optional object ID field, and the user data. The header is detailed in . The actual user data *************** data. Empty in ordinary tables. *** 953,965 **** t_xmin ! TransactionId 4 bytes insert XID stamp t_xmax ! TransactionId 4 bytes delete XID stamp --- 966,978 ---- t_xmin ! ShortTransactionId 4 bytes insert XID stamp t_xmax ! ShortTransactionId 4 bytes delete XID stamp *************** data. Empty in ordinary tables. *** 971,977 **** t_xvac ! TransactionId 4 bytes XID for VACUUM operation moving a row version --- 984,990 ---- t_xvac ! ShortTransactionId 4 bytes XID for VACUUM operation moving a row version diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c new file mode 100644 index 13ee528..2bf84cf *** a/src/backend/access/common/heaptuple.c --- b/src/backend/access/common/heaptuple.c *************** heap_getsysattr(HeapTuple tup, int attnu *** 571,580 **** result = ObjectIdGetDatum(HeapTupleGetOid(tup)); break; case MinTransactionIdAttributeNumber: ! result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmin(tup->t_data)); break; case MaxTransactionIdAttributeNumber: ! result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmax(tup->t_data)); break; case MinCommandIdAttributeNumber: case MaxCommandIdAttributeNumber: --- 571,580 ---- result = ObjectIdGetDatum(HeapTupleGetOid(tup)); break; case MinTransactionIdAttributeNumber: ! result = TransactionIdGetDatum(HeapTupleGetRawXmin(tup)); break; case MaxTransactionIdAttributeNumber: ! result = TransactionIdGetDatum(HeapTupleGetRawXmax(tup)); break; case MinCommandIdAttributeNumber: case MaxCommandIdAttributeNumber: *************** heap_copytuple(HeapTuple tuple) *** 619,624 **** --- 619,625 ---- newTuple->t_len = tuple->t_len; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyBase(newTuple, tuple); newTuple->t_data = (HeapTupleHeader) ((char *) newTuple + HEAPTUPLESIZE); memcpy((char *) newTuple->t_data, (char *) tuple->t_data, tuple->t_len); return newTuple; *************** heap_copytuple_with_tuple(HeapTuple src, *** 645,650 **** --- 646,652 ---- dest->t_len = src->t_len; dest->t_self = src->t_self; dest->t_tableOid = src->t_tableOid; + HeapTupleCopyBase(dest, src); dest->t_data = (HeapTupleHeader) palloc(src->t_len); memcpy((char *) dest->t_data, (char *) src->t_data, src->t_len); } *************** heap_form_tuple(TupleDesc tupleDescripto *** 755,760 **** --- 757,763 ---- tuple->t_len = len; ItemPointerSetInvalid(&(tuple->t_self)); tuple->t_tableOid = InvalidOid; + HeapTupleSetZeroBase(tuple); HeapTupleHeaderSetDatumLength(td, len); HeapTupleHeaderSetTypeId(td, tupleDescriptor->tdtypeid); *************** heap_modify_tuple(HeapTuple tuple, *** 843,848 **** --- 846,852 ---- newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyBase(newTuple, tuple); if (tupleDesc->tdhasoid) HeapTupleSetOid(newTuple, HeapTupleGetOid(tuple)); *************** heap_tuple_from_minimal_tuple(MinimalTup *** 1505,1510 **** --- 1509,1515 ---- result->t_len = len; ItemPointerSetInvalid(&(result->t_self)); result->t_tableOid = InvalidOid; + HeapTupleSetZeroBase(result); result->t_data = (HeapTupleHeader) ((char *) result + HEAPTUPLESIZE); memcpy((char *) result->t_data + MINIMAL_TUPLE_OFFSET, mtup, mtup->t_len); memset(result->t_data, 0, offsetof(HeapTupleHeaderData, t_infomask2)); diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c new file mode 100644 index 98e25d6..b9341b1 *** a/src/backend/access/common/reloptions.c --- b/src/backend/access/common/reloptions.c *************** static relopt_int intRelOpts[] = *** 231,288 **** }, { { - "autovacuum_freeze_min_age", - "Minimum age at which VACUUM should freeze a table row, for autovacuum", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 0, 1000000000 - }, - { - { - "autovacuum_multixact_freeze_min_age", - "Minimum multixact age at which VACUUM should freeze a row multixact's, for autovacuum", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 0, 1000000000 - }, - { - { - "autovacuum_freeze_max_age", - "Age at which to autovacuum a table to prevent transaction ID wraparound", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 100000, 2000000000 - }, - { - { - "autovacuum_multixact_freeze_max_age", - "Multixact age at which to autovacuum a table to prevent multixact wraparound", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 10000, 2000000000 - }, - { - { - "autovacuum_freeze_table_age", - "Age at which VACUUM should perform a full table sweep to freeze row versions", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, -1, 0, 2000000000 - }, - { - { - "autovacuum_multixact_freeze_table_age", - "Age of multixact at which VACUUM should perform a full table sweep to freeze row versions", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, -1, 0, 2000000000 - }, - { - { "log_autovacuum_min_duration", "Sets the minimum execution time above which autovacuum actions will be logged", RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, --- 231,236 ---- *************** static relopt_int intRelOpts[] = *** 329,347 **** }, -1, 0, 1024 }, - /* list terminator */ {{NULL}} }; - static relopt_int64 int64RelOpts[] = { /* list terminator */ {{NULL}} }; - static relopt_real realRelOpts[] = { { --- 277,346 ---- }, -1, 0, 1024 }, /* list terminator */ {{NULL}} }; static relopt_int64 int64RelOpts[] = { + { + { + "autovacuum_freeze_min_age", + "Minimum age at which VACUUM should freeze a table row, for autovacuum", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(1000000000) + }, + { + { + "autovacuum_multixact_freeze_min_age", + "Minimum multixact age at which VACUUM should freeze a row multixact's, for autovacuum", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(1000000000) + }, + { + { + "autovacuum_freeze_max_age", + "Age at which to autovacuum a table to prevent transaction ID wraparound", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(100000), INT64CONST(2000000000) + }, + { + { + "autovacuum_multixact_freeze_max_age", + "Multixact age at which to autovacuum a table to prevent multixact wraparound", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(10000), INT64CONST(2000000000) + }, + { + { + "autovacuum_freeze_table_age", + "Age at which VACUUM should perform a full table sweep to freeze row versions", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(2000000000) + }, + { + { + "autovacuum_multixact_freeze_table_age", + "Age of multixact at which VACUUM should perform a full table sweep to freeze row versions", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(2000000000) + }, /* list terminator */ {{NULL}} }; static relopt_real realRelOpts[] = { { *************** initialize_reloptions(void) *** 474,479 **** --- 473,484 ---- int64RelOpts[i].gen.lockmode)); j++; } + for (i = 0; int64RelOpts[i].gen.name; i++) + { + Assert(DoLockModesConflict(int64RelOpts[i].gen.lockmode, + int64RelOpts[i].gen.lockmode)); + j++; + } for (i = 0; realRelOpts[i].gen.name; i++) { Assert(DoLockModesConflict(realRelOpts[i].gen.lockmode, *************** default_reloptions(Datum reloptions, boo *** 1400,1416 **** offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_delay)}, {"autovacuum_vacuum_cost_limit", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_limit)}, ! {"autovacuum_freeze_min_age", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_min_age)}, ! {"autovacuum_freeze_max_age", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_max_age)}, ! {"autovacuum_freeze_table_age", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_table_age)}, ! {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_min_age)}, ! {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_max_age)}, ! {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_table_age)}, {"log_autovacuum_min_duration", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)}, --- 1405,1421 ---- offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_delay)}, {"autovacuum_vacuum_cost_limit", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_limit)}, ! {"autovacuum_freeze_min_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_min_age)}, ! {"autovacuum_freeze_max_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_max_age)}, ! {"autovacuum_freeze_table_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_table_age)}, ! {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_min_age)}, ! {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_max_age)}, ! {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_table_age)}, {"log_autovacuum_min_duration", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)}, diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c new file mode 100644 index 565525b..8f63aea *** a/src/backend/access/gist/gist.c --- b/src/backend/access/gist/gist.c *************** gistdoinsert(Relation r, IndexTuple itup *** 641,646 **** --- 641,647 ---- stack->page = (Page) BufferGetPage(stack->buffer); stack->lsn = PageGetLSN(stack->page); + elog(LOG, "LSN = " UINT64_FORMAT, stack->lsn); Assert(!RelationNeedsWAL(state.r) || !XLogRecPtrIsInvalid(stack->lsn)); /* diff --git a/src/backend/access/hash/hash_xlog.c b/src/backend/access/hash/hash_xlog.c new file mode 100644 index 67a856c..530b8d2 *** a/src/backend/access/hash/hash_xlog.c --- b/src/backend/access/hash/hash_xlog.c *************** *** 18,23 **** --- 18,24 ---- #include "access/bufmask.h" #include "access/hash.h" #include "access/hash_xlog.h" + #include "access/htup_details.h" #include "access/xlogutils.h" #include "access/xlog.h" #include "access/transam.h" *************** hash_xlog_vacuum_get_latestRemovedXid(XL *** 988,994 **** ItemId iitemid, hitemid; IndexTuple itup; - HeapTupleHeader htuphdr; BlockNumber hblkno; OffsetNumber hoffnum; TransactionId latestRemovedXid = InvalidTransactionId; --- 989,994 ---- *************** hash_xlog_vacuum_get_latestRemovedXid(XL *** 1088,1095 **** */ if (ItemIdHasStorage(hitemid)) { ! htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid); ! HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr, &latestRemovedXid); } else if (ItemIdIsDead(hitemid)) { --- 1088,1099 ---- */ if (ItemIdHasStorage(hitemid)) { ! HeapTupleData htup; ! ! htup.t_data = (HeapTupleHeader) PageGetItem(hpage, hitemid); ! HeapTupleCopyBaseFromPage(&htup, hpage); ! ! HeapTupleHeaderAdvanceLatestRemovedXid(&htup, &latestRemovedXid); } else if (ItemIdIsDead(hitemid)) { diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c new file mode 100644 index 436a01f..8551300 *** a/src/backend/access/heap/heapam.c --- b/src/backend/access/heap/heapam.c *************** *** 56,61 **** --- 56,62 ---- #include "access/xlogutils.h" #include "catalog/catalog.h" #include "catalog/namespace.h" + #include "commands/vacuum.h" #include "miscadmin.h" #include "pgstat.h" #include "port/atomics.h" *************** static HeapScanDesc heap_beginscan_inter *** 93,99 **** static void heap_parallelscan_startblock_init(HeapScanDesc scan); static BlockNumber heap_parallelscan_nextpage(HeapScanDesc scan); static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, ! TransactionId xid, CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tup, --- 94,100 ---- static void heap_parallelscan_startblock_init(HeapScanDesc scan); static BlockNumber heap_parallelscan_nextpage(HeapScanDesc scan); static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, ! CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tup, *************** heapgetpage(HeapScanDesc scan, BlockNumb *** 433,438 **** --- 434,440 ---- loctup.t_tableOid = RelationGetRelid(scan->rs_rd); loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); loctup.t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(&loctup, dp); ItemPointerSet(&(loctup.t_self), page, lineoff); if (all_visible) *************** heapgettup(HeapScanDesc scan, *** 629,634 **** --- 631,637 ---- tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); return; } *************** heapgettup(HeapScanDesc scan, *** 648,653 **** --- 651,657 ---- tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); ItemPointerSet(&(tuple->t_self), page, lineoff); /* *************** heapgettup_pagemode(HeapScanDesc scan, *** 927,932 **** --- 931,937 ---- tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); /* check that rs_cindex is in sync */ Assert(scan->rs_cindex < scan->rs_ntuples); *************** heapgettup_pagemode(HeapScanDesc scan, *** 949,954 **** --- 954,960 ---- tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); ItemPointerSet(&(tuple->t_self), page, lineoff); /* *************** heap_fetch(Relation relation, *** 1946,1951 **** --- 1952,1958 ---- tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(tuple, page); /* * check time qualification of tuple, then release lock *************** heap_hot_search_buffer(ItemPointer tid, *** 2031,2036 **** --- 2038,2044 ---- skip = !first_call; heapTuple->t_self = *tid; + HeapTupleCopyBaseFromPage(heapTuple, dp); /* Scan through possible multiple members of HOT-chain */ for (;;) *************** heap_hot_search_buffer(ItemPointer tid, *** 2061,2066 **** --- 2069,2075 ---- heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp); heapTuple->t_len = ItemIdGetLength(lp); heapTuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(heapTuple, dp); ItemPointerSetOffsetNumber(&heapTuple->t_self, offnum); /* *************** heap_hot_search_buffer(ItemPointer tid, *** 2075,2081 **** */ if (TransactionIdIsValid(prev_xmax) && !TransactionIdEquals(prev_xmax, ! HeapTupleHeaderGetXmin(heapTuple->t_data))) break; /* --- 2084,2090 ---- */ if (TransactionIdIsValid(prev_xmax) && !TransactionIdEquals(prev_xmax, ! HeapTupleGetXmin(heapTuple))) break; /* *************** heap_hot_search_buffer(ItemPointer tid, *** 2136,2142 **** ItemPointerGetBlockNumber(tid)); offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid); at_chain_start = false; ! prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); } else break; /* end of chain */ --- 2145,2151 ---- ItemPointerGetBlockNumber(tid)); offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid); at_chain_start = false; ! prev_xmax = HeapTupleGetUpdateXidAny(heapTuple); } else break; /* end of chain */ *************** heap_get_latest_tid(Relation relation, *** 2255,2267 **** tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_tableOid = RelationGetRelid(relation); /* * After following a t_ctid link, we might arrive at an unrelated * tuple. Check for XMIN match. */ if (TransactionIdIsValid(priorXmax) && ! !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data))) { UnlockReleaseBuffer(buffer); break; --- 2264,2277 ---- tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(&tp, page); /* * After following a t_ctid link, we might arrive at an unrelated * tuple. Check for XMIN match. */ if (TransactionIdIsValid(priorXmax) && ! !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tp))) { UnlockReleaseBuffer(buffer); break; *************** heap_get_latest_tid(Relation relation, *** 2280,2286 **** * If there's a valid t_ctid link, follow it, else we're done. */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || ! HeapTupleHeaderIsOnlyLocked(tp.t_data) || ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) { UnlockReleaseBuffer(buffer); --- 2290,2296 ---- * If there's a valid t_ctid link, follow it, else we're done. */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || ! HeapTupleHeaderIsOnlyLocked(&tp) || ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) { UnlockReleaseBuffer(buffer); *************** heap_get_latest_tid(Relation relation, *** 2288,2294 **** } ctid = tp.t_data->t_ctid; ! priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data); UnlockReleaseBuffer(buffer); } /* end of loop */ } --- 2298,2304 ---- } ctid = tp.t_data->t_ctid; ! priorXmax = HeapTupleGetUpdateXidAny(&tp); UnlockReleaseBuffer(buffer); } /* end of loop */ } *************** heap_get_latest_tid(Relation relation, *** 2313,2319 **** static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid) { ! Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple), xid)); Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)); if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID))) --- 2323,2329 ---- static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid) { ! Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(BufferGetPage(buffer), tuple), xid)); Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)); if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID))) *************** heap_insert(Relation relation, HeapTuple *** 2426,2432 **** * Note: below this point, heaptup is the data we actually intend to store * into the relation; tup is the caller's original untoasted data. */ ! heaptup = heap_prepare_insert(relation, tup, xid, cid, options); /* * Find buffer to insert this tuple into. If the page is all visible, --- 2436,2442 ---- * Note: below this point, heaptup is the data we actually intend to store * into the relation; tup is the caller's original untoasted data. */ ! heaptup = heap_prepare_insert(relation, tup, cid, options); /* * Find buffer to insert this tuple into. If the page is all visible, *************** heap_insert(Relation relation, HeapTuple *** 2453,2463 **** */ CheckForSerializableConflictIn(relation, NULL, InvalidBuffer); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); RelationPutHeapTuple(relation, buffer, heaptup, ! (options & HEAP_INSERT_SPECULATIVE) != 0); if (PageIsAllVisible(BufferGetPage(buffer))) { --- 2463,2476 ---- */ CheckForSerializableConflictIn(relation, NULL, InvalidBuffer); + heap_page_prepare_for_xid(relation, buffer, xid, false); + HeapTupleCopyBaseFromPage(heaptup, BufferGetPage(buffer)); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); RelationPutHeapTuple(relation, buffer, heaptup, ! (options & HEAP_INSERT_SPECULATIVE) != 0, xid); if (PageIsAllVisible(BufferGetPage(buffer))) { *************** heap_insert(Relation relation, HeapTuple *** 2530,2535 **** --- 2543,2552 ---- } XLogBeginInsert(); + if (info & XLOG_HEAP_INIT_PAGE) + { + XLogRegisterData((char *) &HeapPageGetSpecial(page)->pd_xid_base, sizeof(TransactionId)); + } XLogRegisterData((char *) &xlrec, SizeOfHeapInsert); xlhdr.t_infomask2 = heaptup->t_data->t_infomask2; *************** heap_insert(Relation relation, HeapTuple *** 2587,2592 **** --- 2604,3035 ---- } /* + * Find minimum and maximum short transaction ids which occurs in the page. + */ + static bool + heap_page_xid_min_max(Page page, bool multi, + ShortTransactionId *min, + ShortTransactionId *max) + { + bool found = false; + OffsetNumber offnum, + maxoff; + + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleHeader htup; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + if (!multi) + { + if (!HeapTupleHeaderXminFrozen(htup) && + TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin)) + { + if (!found) + { + *min = *max = htup->t_choice.t_heap.t_xmin; + found = true; + } + else + { + *min = Min(*min, htup->t_choice.t_heap.t_xmin); + *max = Max(*max, htup->t_choice.t_heap.t_xmin); + } + } + + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax) && + !(htup->t_infomask & HEAP_XMAX_IS_MULTI)) + { + if (!found) + { + *min = *max = htup->t_choice.t_heap.t_xmax; + found = true; + } + else + { + *min = Min(*min, htup->t_choice.t_heap.t_xmax); + *max = Max(*max, htup->t_choice.t_heap.t_xmax); + } + } + } + else + { + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax) && + (htup->t_infomask & HEAP_XMAX_IS_MULTI)) + { + if (!found) + { + *min = *max = htup->t_choice.t_heap.t_xmax; + found = true; + } + else + { + *min = Min(*min, htup->t_choice.t_heap.t_xmax); + *max = Max(*max, htup->t_choice.t_heap.t_xmax); + } + } + } + } + return found; + } + + /* + * Shift xid base in the page. WAL-logged if buffer is specified. + */ + static void + heap_page_shift_base(Buffer buffer, Page page, bool multi, int64 delta) + { + HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); + OffsetNumber offnum, + maxoff; + + /* Iterate over page items */ + maxoff = PageGetMaxOffsetNumber(page); + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleHeader htup; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + /* Apply xid shift to heap tuple */ + if (!multi) + { + if (!HeapTupleHeaderXminFrozen(htup) && + TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin)) + { + Assert(htup->t_choice.t_heap.t_xmin - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmin - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmin -= delta; + } + + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax) && + !(htup->t_infomask & HEAP_XMAX_IS_MULTI)) + { + Assert(htup->t_choice.t_heap.t_xmax - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmax - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmax -= delta; + } + } + else + { + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax) && + (htup->t_infomask & HEAP_XMAX_IS_MULTI)) + { + Assert(htup->t_choice.t_heap.t_xmax - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmax - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmax -= delta; + } + } + } + + /* Apply xid shift to base as well */ + if (!multi) + pageSpecial->pd_xid_base += delta; + else + pageSpecial->pd_multi_base += delta; + + /* Write WAL record if needed */ + if (BufferIsValid(buffer)) + { + XLogRecPtr recptr; + xl_heap_base_shift xlrec; + + xlrec.multi = multi; + xlrec.delta = delta; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, SizeOfHeapBaseShift); + + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + + recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_BASE_SHIFT); + + PageSetLSN(page, recptr); + } + } + + /* + * Freeze xids in the single heap page. Useful when we can't fit new xid even + * with base shift. + */ + static int + freeze_single_heap_page(Relation relation, Buffer buffer) + { + Page page = BufferGetPage(buffer); + OffsetNumber offnum, + maxoff; + HeapTupleData tuple; + int nfrozen = 0; + xl_heap_freeze_tuple *frozen; + TransactionId latestRemovedXid = InvalidTransactionId, + OldestXmin, + FreezeXid; + MultiXactId MultiXactCutoff; + + vacuum_set_xid_limits(relation, 0, 0, 0, 0, + &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff, + NULL); + + heap_page_prune(relation, buffer, OldestXmin, false, &latestRemovedXid, false); + + /* + * Now scan the page to collect vacuumable items and check for tuples + * requiring freezing. + */ + maxoff = PageGetMaxOffsetNumber(page); + frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage); + + /* + * Note: If you change anything in the loop below, also look at + * heap_page_is_all_visible to see if that needs to be changed. + */ + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + bool tuple_totally_frozen; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + Assert(ItemIdIsNormal(itemid)); + + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(&tuple, page); + + /* + * Each non-removable tuple must be checked to see if it needs + * freezing. Note we already have exclusive buffer lock. + */ + if (heap_prepare_freeze_tuple(&tuple, FreezeXid, + MultiXactCutoff, &frozen[nfrozen], + &tuple_totally_frozen)) + frozen[nfrozen++].offset = offnum; + } /* scan along page */ + + /* + * If we froze any tuples, mark the buffer dirty, and write a WAL + * record recording the changes. We must log the changes to be + * crash-safe against future truncation of CLOG. + */ + if (nfrozen > 0) + { + int i; + + START_CRIT_SECTION(); + + MarkBufferDirty(buffer); + + /* execute collected freezes */ + for (i = 0; i < nfrozen; i++) + { + ItemId itemid; + HeapTupleHeader htup; + + itemid = PageGetItemId(page, frozen[i].offset); + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + heap_execute_freeze_tuple_page(page, htup, &frozen[i]); + } + + /* Now WAL-log freezing if necessary */ + if (RelationNeedsWAL(relation)) + { + XLogRecPtr recptr; + + recptr = log_heap_freeze(relation, buffer, FreezeXid, + frozen, nfrozen); + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + } + + return nfrozen; + } + + /* + * Ensure that given xid fits base of given page. + */ + bool + heap_page_prepare_for_xid(Relation relation, Buffer buffer, + TransactionId xid, bool multi) + { + Page page = BufferGetPage(buffer); + HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); + TransactionId base; + bool found; + ShortTransactionId min, + max; + int i; + + if (!TransactionIdIsNormal(xid)) + return false; + + for (i = 0; i < 2; i++) + { + if (!multi) + base = pageSpecial->pd_xid_base; + else + base = pageSpecial->pd_multi_base; + + /* Can we already store this xid? */ + if (xid >= base + FirstNormalTransactionId && xid <= base + MaxShortTransactionId) + return false; + + /* Find minimum and maximum xids in the page */ + found = heap_page_xid_min_max(page, multi, &min, &max); + + /* No items on the page? */ + if (!found) + { + int64 delta; + + if (!multi) + delta = (xid - FirstNormalTransactionId) - pageSpecial->pd_xid_base; + else + delta = (xid - FirstNormalTransactionId) - pageSpecial->pd_multi_base; + + heap_page_shift_base(RelationNeedsWAL(relation) ? buffer : InvalidBuffer, + page, multi, delta); + MarkBufferDirty(buffer); + return false; + } + + /* Can we just shift base on the page */ + if (xid < base + FirstNormalTransactionId) + { + int64 freeDelta = MaxShortTransactionId - max, + requiredDelta = (base + FirstNormalTransactionId) - xid; + + if (requiredDelta <= freeDelta) + { + heap_page_shift_base(RelationNeedsWAL(relation) ? buffer : InvalidBuffer, + page, multi, - (freeDelta + requiredDelta) / 2); + MarkBufferDirty(buffer); + return true; + } + } + else + { + int64 freeDelta = min - FirstNormalTransactionId, + requiredDelta = xid - (base + MaxShortTransactionId); + + if (requiredDelta <= freeDelta) + { + heap_page_shift_base(RelationNeedsWAL(relation) ? buffer : InvalidBuffer, + page, multi, (freeDelta + requiredDelta) / 2); + MarkBufferDirty(buffer); + return true; + } + } + + if (i == 1) + { + break; + } + + /* Have to try freeing the page... */ + freeze_single_heap_page(relation, buffer); + } + + elog(ERROR, "Can't fit xid into page."); + return false; + } + + /* + * Ensure that given xid fits base of given page. + */ + bool + rewrite_page_prepare_for_xid(Page page, TransactionId xid, bool multi) + { + HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); + TransactionId base; + bool found; + ShortTransactionId min, + max; + + if (!TransactionIdIsNormal(xid)) + return false; + + if (!multi) + base = pageSpecial->pd_xid_base; + else + base = pageSpecial->pd_multi_base; + + /* Can we already store this xid? */ + if (xid >= base + FirstNormalTransactionId && xid <= base + MaxShortTransactionId) + return false; + + /* Find minimum and maximum xids in the page */ + found = heap_page_xid_min_max(page, multi, &min, &max); + + /* No items on the page? */ + if (!found) + { + if (!multi) + pageSpecial->pd_xid_base = xid - FirstNormalTransactionId; + else + pageSpecial->pd_multi_base = xid - FirstNormalTransactionId; + return false; + } + + /* Can we just shift base on the page */ + if (xid < base + FirstNormalTransactionId) + { + int64 freeDelta = MaxShortTransactionId - max, + requiredDelta = (base + FirstNormalTransactionId) - xid; + + if (requiredDelta <= freeDelta) + { + heap_page_shift_base(InvalidBuffer, + page, multi, - (freeDelta + requiredDelta) / 2); + return true; + } + } + else + { + int64 freeDelta = min - FirstNormalTransactionId, + requiredDelta = xid - (base + MaxShortTransactionId); + + if (requiredDelta <= freeDelta) + { + heap_page_shift_base(InvalidBuffer, + page, multi, (freeDelta + requiredDelta) / 2); + return true; + } + } + + elog(ERROR, "Can't fit xid into page."); + return false; + } + + + /* * Subroutine for heap_insert(). Prepares a tuple for insertion. This sets the * tuple header fields, assigns an OID, and toasts the tuple if necessary. * Returns a toasted version of the tuple if it was toasted, or the original *************** heap_insert(Relation relation, HeapTuple *** 2594,2600 **** * the original tuple. */ static HeapTuple ! heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, CommandId cid, int options) { /* --- 3037,3043 ---- * the original tuple. */ static HeapTuple ! heap_prepare_insert(Relation relation, HeapTuple tup, CommandId cid, int options) { /* *************** heap_prepare_insert(Relation relation, H *** 2635,2646 **** tup->t_data->t_infomask &= ~(HEAP_XACT_MASK); tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); tup->t_data->t_infomask |= HEAP_XMAX_INVALID; ! HeapTupleHeaderSetXmin(tup->t_data, xid); if (options & HEAP_INSERT_FROZEN) HeapTupleHeaderSetXminFrozen(tup->t_data); HeapTupleHeaderSetCmin(tup->t_data, cid); ! HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */ tup->t_tableOid = RelationGetRelid(relation); /* --- 3078,3089 ---- tup->t_data->t_infomask &= ~(HEAP_XACT_MASK); tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); tup->t_data->t_infomask |= HEAP_XMAX_INVALID; ! tup->t_data->t_choice.t_heap.t_xmin = InvalidTransactionId; if (options & HEAP_INSERT_FROZEN) HeapTupleHeaderSetXminFrozen(tup->t_data); HeapTupleHeaderSetCmin(tup->t_data, cid); ! HeapTupleSetXmax(tup, 0); /* for cleanliness */ tup->t_tableOid = RelationGetRelid(relation); /* *************** heap_multi_insert(Relation relation, Hea *** 2693,2700 **** /* Toast and set header data in all the tuples */ heaptuples = palloc(ntuples * sizeof(HeapTuple)); for (i = 0; i < ntuples; i++) ! heaptuples[i] = heap_prepare_insert(relation, tuples[i], ! xid, cid, options); /* * Allocate some memory to use for constructing the WAL record. Using --- 3136,3142 ---- /* Toast and set header data in all the tuples */ heaptuples = palloc(ntuples * sizeof(HeapTuple)); for (i = 0; i < ntuples; i++) ! heaptuples[i] = heap_prepare_insert(relation, tuples[i], cid, options); /* * Allocate some memory to use for constructing the WAL record. Using *************** heap_multi_insert(Relation relation, Hea *** 2748,2753 **** --- 3190,3197 ---- &vmbuffer, NULL); page = BufferGetPage(buffer); + heap_page_prepare_for_xid(relation, buffer, xid, false); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); *************** heap_multi_insert(Relation relation, Hea *** 2755,2761 **** * RelationGetBufferForTuple has ensured that the first tuple fits. * Put that on the page, and then as many other tuples as fit. */ ! RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false); for (nthispage = 1; ndone + nthispage < ntuples; nthispage++) { HeapTuple heaptup = heaptuples[ndone + nthispage]; --- 3199,3206 ---- * RelationGetBufferForTuple has ensured that the first tuple fits. * Put that on the page, and then as many other tuples as fit. */ ! HeapTupleCopyBaseFromPage(heaptuples[ndone], BufferGetPage(buffer)); ! RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false, xid); for (nthispage = 1; ndone + nthispage < ntuples; nthispage++) { HeapTuple heaptup = heaptuples[ndone + nthispage]; *************** heap_multi_insert(Relation relation, Hea *** 2763,2769 **** if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace) break; ! RelationPutHeapTuple(relation, buffer, heaptup, false); /* * We don't use heap_multi_insert for catalog tuples yet, but --- 3208,3215 ---- if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace) break; ! HeapTupleCopyBaseFromPage(heaptup, BufferGetPage(buffer)); ! RelationPutHeapTuple(relation, buffer, heaptup, false, xid); /* * We don't use heap_multi_insert for catalog tuples yet, but *************** heap_multi_insert(Relation relation, Hea *** 2882,2887 **** --- 3328,3337 ---- bufflags |= REGBUF_KEEP_DATA; XLogBeginInsert(); + if (info & XLOG_HEAP_INIT_PAGE) + { + XLogRegisterData((char *) &HeapPageGetSpecial(page)->pd_xid_base, sizeof(TransactionId)); + } XLogRegisterData((char *) xlrec, tupledata - scratch); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags); *************** heap_delete(Relation relation, ItemPoint *** 3093,3098 **** --- 3543,3549 ---- tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; + HeapTupleCopyBaseFromPage(&tp, page); l1: result = HeapTupleSatisfiesUpdate(&tp, cid, buffer); *************** l1: *** 3110,3116 **** uint16 infomask; /* must copy state data before unlocking buffer */ ! xwait = HeapTupleHeaderGetRawXmax(tp.t_data); infomask = tp.t_data->t_infomask; /* --- 3561,3567 ---- uint16 infomask; /* must copy state data before unlocking buffer */ ! xwait = HeapTupleGetRawXmax(&tp); infomask = tp.t_data->t_infomask; /* *************** l1: *** 3150,3156 **** * for xmax change, and start over if so. */ if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), xwait)) goto l1; } --- 3601,3607 ---- * for xmax change, and start over if so. */ if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleGetRawXmax(&tp), xwait)) goto l1; } *************** l1: *** 3183,3189 **** * Check for xmax change, and start over if so. */ if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), xwait)) goto l1; --- 3634,3640 ---- * Check for xmax change, and start over if so. */ if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleGetRawXmax(&tp), xwait)) goto l1; *************** l1: *** 3197,3203 **** */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) || ! HeapTupleHeaderIsOnlyLocked(tp.t_data)) result = HeapTupleMayBeUpdated; else result = HeapTupleUpdated; --- 3648,3654 ---- */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) || ! HeapTupleHeaderIsOnlyLocked(&tp)) result = HeapTupleMayBeUpdated; else result = HeapTupleUpdated; *************** l1: *** 3217,3223 **** result == HeapTupleBeingUpdated); Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID)); hufd->ctid = tp.t_data->t_ctid; ! hufd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data); if (result == HeapTupleSelfUpdated) hufd->cmax = HeapTupleHeaderGetCmax(tp.t_data); else --- 3668,3674 ---- result == HeapTupleBeingUpdated); Assert(!(tp.t_data->t_infomask & HEAP_XMAX_INVALID)); hufd->ctid = tp.t_data->t_ctid; ! hufd->xmax = HeapTupleGetUpdateXidAny(&tp); if (result == HeapTupleSelfUpdated) hufd->cmax = HeapTupleHeaderGetCmax(tp.t_data); else *************** l1: *** 3242,3248 **** CheckForSerializableConflictIn(relation, &tp, buffer); /* replace cid with a combo cid if necessary */ ! HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo); /* * Compute replica identity tuple before entering the critical section so --- 3693,3699 ---- CheckForSerializableConflictIn(relation, &tp, buffer); /* replace cid with a combo cid if necessary */ ! HeapTupleHeaderAdjustCmax(&tp, &cid, &iscombo); /* * Compute replica identity tuple before entering the critical section so *************** l1: *** 3260,3270 **** */ MultiXactIdSetOldestMember(); ! compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data), tp.t_data->t_infomask, tp.t_data->t_infomask2, xid, LockTupleExclusive, true, &new_xmax, &new_infomask, &new_infomask2); START_CRIT_SECTION(); /* --- 3711,3725 ---- */ MultiXactIdSetOldestMember(); ! compute_new_xmax_infomask(HeapTupleGetRawXmax(&tp), tp.t_data->t_infomask, tp.t_data->t_infomask2, xid, LockTupleExclusive, true, &new_xmax, &new_infomask, &new_infomask2); + heap_page_prepare_for_xid(relation, buffer, new_xmax, + (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(&tp, page); + START_CRIT_SECTION(); /* *************** l1: *** 3290,3296 **** tp.t_data->t_infomask |= new_infomask; tp.t_data->t_infomask2 |= new_infomask2; HeapTupleHeaderClearHotUpdated(tp.t_data); ! HeapTupleHeaderSetXmax(tp.t_data, new_xmax); HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo); /* Make sure there is no forward chain link in t_ctid */ tp.t_data->t_ctid = tp.t_self; --- 3745,3751 ---- tp.t_data->t_infomask |= new_infomask; tp.t_data->t_infomask2 |= new_infomask2; HeapTupleHeaderClearHotUpdated(tp.t_data); ! HeapTupleSetXmax(&tp, new_xmax); HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo); /* Make sure there is no forward chain link in t_ctid */ tp.t_data->t_ctid = tp.t_self; *************** heap_update(Relation relation, ItemPoint *** 3492,3498 **** HeapTuple heaptup; HeapTuple old_key_tuple = NULL; bool old_key_copied = false; ! Page page; BlockNumber block; MultiXactStatus mxact_status; Buffer buffer, --- 3947,3954 ---- HeapTuple heaptup; HeapTuple old_key_tuple = NULL; bool old_key_copied = false; ! Page page, ! newpage; BlockNumber block; MultiXactStatus mxact_status; Buffer buffer, *************** heap_update(Relation relation, ItemPoint *** 3598,3606 **** --- 4054,4064 ---- oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp); oldtup.t_len = ItemIdGetLength(lp); oldtup.t_self = *otid; + HeapTupleCopyBaseFromPage(&oldtup, page); /* the new tuple is ready, except for this: */ newtup->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(newtup, page); /* Fill in OID for newtup */ if (relation->rd_rel->relhasoids) *************** l2: *** 3698,3704 **** */ /* must copy state data before unlocking buffer */ ! xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data); infomask = oldtup.t_data->t_infomask; /* --- 4156,4162 ---- */ /* must copy state data before unlocking buffer */ ! xwait = HeapTupleGetRawXmax(&oldtup); infomask = oldtup.t_data->t_infomask; /* *************** l2: *** 3744,3749 **** --- 4202,4208 ---- checked_lockers = true; locker_remains = remain != 0; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(&oldtup, page); /* * If xwait had just locked the tuple then some other xact *************** l2: *** 3752,3758 **** */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data), xwait)) goto l2; } --- 4211,4217 ---- */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleGetRawXmax(&oldtup), xwait)) goto l2; } *************** l2: *** 3777,3783 **** * subxact aborts. */ if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask)) ! update_xact = HeapTupleGetUpdateXid(oldtup.t_data); else update_xact = InvalidTransactionId; --- 4236,4242 ---- * subxact aborts. */ if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask)) ! update_xact = HeapTupleGetUpdateXid(&oldtup); else update_xact = InvalidTransactionId; *************** l2: *** 3825,3830 **** --- 4284,4291 ---- checked_lockers = true; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(&oldtup, page); + /* * xwait is done, but if xwait had just locked the tuple then some * other xact could update this tuple before we get to this point. *************** l2: *** 3832,3838 **** */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || !TransactionIdEquals(xwait, ! HeapTupleHeaderGetRawXmax(oldtup.t_data))) goto l2; /* Otherwise check if it committed or aborted */ --- 4293,4299 ---- */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || !TransactionIdEquals(xwait, ! HeapTupleGetRawXmax(&oldtup))) goto l2; /* Otherwise check if it committed or aborted */ *************** l2: *** 3858,3864 **** result == HeapTupleBeingUpdated); Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)); hufd->ctid = oldtup.t_data->t_ctid; ! hufd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data); if (result == HeapTupleSelfUpdated) hufd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data); else --- 4319,4325 ---- result == HeapTupleBeingUpdated); Assert(!(oldtup.t_data->t_infomask & HEAP_XMAX_INVALID)); hufd->ctid = oldtup.t_data->t_ctid; ! hufd->xmax = HeapTupleGetUpdateXidAny(&oldtup); if (result == HeapTupleSelfUpdated) hufd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data); else *************** l2: *** 3890,3895 **** --- 4351,4357 ---- LockBuffer(buffer, BUFFER_LOCK_UNLOCK); visibilitymap_pin(relation, block, &vmbuffer); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(&oldtup, page); goto l2; } *************** l2: *** 3899,3905 **** * If the tuple we're updating is locked, we need to preserve the locking * info in the old tuple's Xmax. Prepare a new Xmax value for this. */ ! compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, true, --- 4361,4367 ---- * If the tuple we're updating is locked, we need to preserve the locking * info in the old tuple's Xmax. Prepare a new Xmax value for this. */ ! compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, true, *************** l2: *** 3918,3924 **** (checked_lockers && !locker_remains)) xmax_new_tuple = InvalidTransactionId; else ! xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data); if (!TransactionIdIsValid(xmax_new_tuple)) { --- 4380,4386 ---- (checked_lockers && !locker_remains)) xmax_new_tuple = InvalidTransactionId; else ! xmax_new_tuple = HeapTupleGetRawXmax(&oldtup); if (!TransactionIdIsValid(xmax_new_tuple)) { *************** l2: *** 3951,3967 **** */ newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK); newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); - HeapTupleHeaderSetXmin(newtup->t_data, xid); HeapTupleHeaderSetCmin(newtup->t_data, cid); newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple; newtup->t_data->t_infomask2 |= infomask2_new_tuple; ! HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple); /* * Replace cid with a combo cid if necessary. Note that we already put * the plain cid into the new tuple. */ ! HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo); /* * If the toaster needs to be activated, OR if the new tuple will not fit --- 4413,4435 ---- */ newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK); newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); HeapTupleHeaderSetCmin(newtup->t_data, cid); newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple; newtup->t_data->t_infomask2 |= infomask2_new_tuple; ! ! heap_page_prepare_for_xid(relation, buffer, xid, false); ! HeapTupleCopyBaseFromPage(newtup, page); ! HeapTupleSetXmin(newtup, xid); ! heap_page_prepare_for_xid(relation, buffer, xmax_new_tuple, ! (newtup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? true : false); ! HeapTupleCopyBaseFromPage(newtup, page); ! HeapTupleSetXmax(newtup, xmax_new_tuple); /* * Replace cid with a combo cid if necessary. Note that we already put * the plain cid into the new tuple. */ ! HeapTupleHeaderAdjustCmax(&oldtup, &cid, &iscombo); /* * If the toaster needs to be activated, OR if the new tuple will not fit *************** l2: *** 4016,4022 **** * updating, because the potentially created multixact would otherwise * be wrong. */ ! compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, false, --- 4484,4490 ---- * updating, because the potentially created multixact would otherwise * be wrong. */ ! compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, false, *************** l2: *** 4025,4030 **** --- 4493,4502 ---- Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple)); + heap_page_prepare_for_xid(relation, buffer, xmax_lock_old_tuple, + (infomask_lock_old_tuple & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(&oldtup, page); + START_CRIT_SECTION(); /* Clear obsolete visibility flags ... */ *************** l2: *** 4033,4041 **** HeapTupleClearHotUpdated(&oldtup); /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_lock_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple); oldtup.t_data->t_infomask |= infomask_lock_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple; HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); /* temporarily make it look not-updated, but locked */ --- 4505,4513 ---- HeapTupleClearHotUpdated(&oldtup); /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_lock_old_tuple)); oldtup.t_data->t_infomask |= infomask_lock_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple; + HeapTupleSetXmax(&oldtup, xmax_lock_old_tuple); HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); /* temporarily make it look not-updated, but locked */ *************** l2: *** 4198,4203 **** --- 4670,4681 ---- bms_overlap(modified_attrs, id_attrs), &old_key_copied); + heap_page_prepare_for_xid(relation, newbuf, xid, false); + HeapTupleCopyBaseFromPage(heaptup, page); + heap_page_prepare_for_xid(relation, buffer, xmax_old_tuple, + (infomask_old_tuple & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(&oldtup, page); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); *************** l2: *** 4232,4248 **** HeapTupleClearHeapOnly(newtup); } ! RelationPutHeapTuple(relation, newbuf, heaptup, false); /* insert new tuple */ /* Clear obsolete visibility flags, possibly set by ourselves above... */ oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple); oldtup.t_data->t_infomask |= infomask_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_old_tuple; HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); /* record address of new tuple in t_ctid of old one */ --- 4710,4739 ---- HeapTupleClearHeapOnly(newtup); } ! newpage = BufferGetPage(newbuf); + if (newbuf != buffer) + { + /* Prepare new page for xids */ + heap_page_prepare_for_xid(relation, newbuf, xid, false); + HeapTupleCopyBaseFromPage(heaptup, newpage); + HeapTupleSetXmin(heaptup, xid); + heap_page_prepare_for_xid(relation, newbuf, xmax_new_tuple, + (heaptup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(heaptup, newpage); + HeapTupleSetXmax(heaptup, xmax_new_tuple); + } + + RelationPutHeapTuple(relation, newbuf, heaptup, false, xid); /* insert new tuple */ /* Clear obsolete visibility flags, possibly set by ourselves above... */ oldtup.t_data->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_old_tuple)); oldtup.t_data->t_infomask |= infomask_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_old_tuple; + HeapTupleSetXmax(&oldtup, xmax_old_tuple); HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); /* record address of new tuple in t_ctid of old one */ *************** heap_lock_tuple(Relation relation, HeapT *** 4598,4603 **** --- 5089,5095 ---- tuple->t_tableOid = RelationGetRelid(relation); l3: + HeapTupleCopyBaseFromPage(tuple, page); result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer); if (result == HeapTupleInvisible) *************** l3: *** 4620,4626 **** ItemPointerData t_ctid; /* must copy state data before unlocking buffer */ ! xwait = HeapTupleHeaderGetRawXmax(tuple->t_data); infomask = tuple->t_data->t_infomask; infomask2 = tuple->t_data->t_infomask2; ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid); --- 5112,5118 ---- ItemPointerData t_ctid; /* must copy state data before unlocking buffer */ ! xwait = HeapTupleGetRawXmax(tuple); infomask = tuple->t_data->t_infomask; infomask2 = tuple->t_data->t_infomask2; ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid); *************** l3: *** 4776,4782 **** * now need to follow the update chain to lock the new * versions. */ ! if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) && ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) || !updated)) goto l3; --- 5268,5274 ---- * now need to follow the update chain to lock the new * versions. */ ! if (!HeapTupleHeaderIsOnlyLocked(tuple) && ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) || !updated)) goto l3; *************** l3: *** 4832,4838 **** */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), xwait)) goto l3; --- 5324,5330 ---- */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; *************** l3: *** 4846,4853 **** /* if the xmax changed in the meantime, start over */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || ! !TransactionIdEquals( ! HeapTupleHeaderGetRawXmax(tuple->t_data), xwait)) goto l3; /* otherwise, we're good */ --- 5338,5344 ---- /* if the xmax changed in the meantime, start over */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; /* otherwise, we're good */ *************** l3: *** 4873,4879 **** /* ... but if the xmax changed in the meantime, start over */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), xwait)) goto l3; Assert(HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask)); --- 5364,5370 ---- /* ... but if the xmax changed in the meantime, start over */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; Assert(HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask)); *************** l3: *** 5021,5027 **** * Check for xmax change, and start over if so. */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), xwait)) goto l3; --- 5512,5518 ---- * Check for xmax change, and start over if so. */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || ! !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; *************** l3: *** 5049,5055 **** if (!require_sleep || (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) || ! HeapTupleHeaderIsOnlyLocked(tuple->t_data)) result = HeapTupleMayBeUpdated; else result = HeapTupleUpdated; --- 5540,5546 ---- if (!require_sleep || (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) || ! HeapTupleHeaderIsOnlyLocked(tuple)) result = HeapTupleMayBeUpdated; else result = HeapTupleUpdated; *************** failed: *** 5062,5068 **** result == HeapTupleWouldBlock); Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID)); hufd->ctid = tuple->t_data->t_ctid; ! hufd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); if (result == HeapTupleSelfUpdated) hufd->cmax = HeapTupleHeaderGetCmax(tuple->t_data); else --- 5553,5559 ---- result == HeapTupleWouldBlock); Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID)); hufd->ctid = tuple->t_data->t_ctid; ! hufd->xmax = HeapTupleGetUpdateXidAny(tuple); if (result == HeapTupleSelfUpdated) hufd->cmax = HeapTupleHeaderGetCmax(tuple->t_data); else *************** failed: *** 5087,5093 **** goto l3; } ! xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); old_infomask = tuple->t_data->t_infomask; /* --- 5578,5584 ---- goto l3; } ! xmax = HeapTupleGetRawXmax(tuple); old_infomask = tuple->t_data->t_infomask; /* *************** failed: *** 5109,5114 **** --- 5600,5609 ---- GetCurrentTransactionId(), mode, false, &xid, &new_infomask, &new_infomask2); + heap_page_prepare_for_xid(relation, *buffer, xid, + (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(tuple, page); + START_CRIT_SECTION(); /* *************** failed: *** 5127,5133 **** tuple->t_data->t_infomask2 |= new_infomask2; if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask)) HeapTupleHeaderClearHotUpdated(tuple->t_data); ! HeapTupleHeaderSetXmax(tuple->t_data, xid); /* * Make sure there is no forward chain link in t_ctid. Note that in the --- 5622,5628 ---- tuple->t_data->t_infomask2 |= new_infomask2; if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask)) HeapTupleHeaderClearHotUpdated(tuple->t_data); ! HeapTupleSetXmax(tuple, xid); /* * Make sure there is no forward chain link in t_ctid. Note that in the *************** l5: *** 5443,5449 **** * TransactionIdIsInProgress() should have returned false. We * assume it's no longer locked in this case. */ ! elog(WARNING, "LOCK_ONLY found for Xid in progress %u", xmax); old_infomask |= HEAP_XMAX_INVALID; old_infomask &= ~HEAP_XMAX_LOCK_ONLY; goto l5; --- 5938,5944 ---- * TransactionIdIsInProgress() should have returned false. We * assume it's no longer locked in this case. */ ! elog(WARNING, "LOCK_ONLY found for Xid in progress " XID_FMT, xmax); old_infomask |= HEAP_XMAX_INVALID; old_infomask &= ~HEAP_XMAX_LOCK_ONLY; goto l5; *************** l4: *** 5710,5716 **** * end of the chain, we're done, so return success. */ if (TransactionIdIsValid(priorXmax) && ! !TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data), priorXmax)) { result = HeapTupleMayBeUpdated; --- 6205,6211 ---- * end of the chain, we're done, so return success. */ if (TransactionIdIsValid(priorXmax) && ! !TransactionIdEquals(HeapTupleGetXmin(&mytup), priorXmax)) { result = HeapTupleMayBeUpdated; *************** l4: *** 5722,5728 **** * (sub)transaction, then we already locked the last live one in the * chain, thus we're done, so return success. */ ! if (TransactionIdDidAbort(HeapTupleHeaderGetXmin(mytup.t_data))) { UnlockReleaseBuffer(buf); return HeapTupleMayBeUpdated; --- 6217,6223 ---- * (sub)transaction, then we already locked the last live one in the * chain, thus we're done, so return success. */ ! if (TransactionIdDidAbort(HeapTupleGetXmin(&mytup))) { UnlockReleaseBuffer(buf); return HeapTupleMayBeUpdated; *************** l4: *** 5730,5736 **** old_infomask = mytup.t_data->t_infomask; old_infomask2 = mytup.t_data->t_infomask2; ! xmax = HeapTupleHeaderGetRawXmax(mytup.t_data); /* * If this tuple version has been updated or locked by some concurrent --- 6225,6231 ---- old_infomask = mytup.t_data->t_infomask; old_infomask2 = mytup.t_data->t_infomask2; ! xmax = HeapTupleGetRawXmax(&mytup); /* * If this tuple version has been updated or locked by some concurrent *************** l4: *** 5743,5749 **** TransactionId rawxmax; bool needwait; ! rawxmax = HeapTupleHeaderGetRawXmax(mytup.t_data); if (old_infomask & HEAP_XMAX_IS_MULTI) { int nmembers; --- 6238,6244 ---- TransactionId rawxmax; bool needwait; ! rawxmax = HeapTupleGetRawXmax(&mytup); if (old_infomask & HEAP_XMAX_IS_MULTI) { int nmembers; *************** l4: *** 5882,5895 **** VISIBILITYMAP_ALL_FROZEN)) cleared_all_frozen = true; START_CRIT_SECTION(); /* ... and set them */ - HeapTupleHeaderSetXmax(mytup.t_data, new_xmax); mytup.t_data->t_infomask &= ~HEAP_XMAX_BITS; mytup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; mytup.t_data->t_infomask |= new_infomask; mytup.t_data->t_infomask2 |= new_infomask2; MarkBufferDirty(buf); --- 6377,6394 ---- VISIBILITYMAP_ALL_FROZEN)) cleared_all_frozen = true; + heap_page_prepare_for_xid(rel, buf, new_xmax, + (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(&mytup, BufferGetPage(buf)); + START_CRIT_SECTION(); /* ... and set them */ mytup.t_data->t_infomask &= ~HEAP_XMAX_BITS; mytup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; mytup.t_data->t_infomask |= new_infomask; mytup.t_data->t_infomask2 |= new_infomask2; + HeapTupleSetXmax(&mytup, new_xmax); MarkBufferDirty(buf); *************** next: *** 5922,5935 **** /* if we find the end of update chain, we're done. */ if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID || ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) || ! HeapTupleHeaderIsOnlyLocked(mytup.t_data)) { result = HeapTupleMayBeUpdated; goto out_locked; } /* tail recursion */ ! priorXmax = HeapTupleHeaderGetUpdateXid(mytup.t_data); ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid); UnlockReleaseBuffer(buf); if (vmbuffer != InvalidBuffer) --- 6421,6434 ---- /* if we find the end of update chain, we're done. */ if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID || ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) || ! HeapTupleHeaderIsOnlyLocked(&mytup)) { result = HeapTupleMayBeUpdated; goto out_locked; } /* tail recursion */ ! priorXmax = HeapTupleGetUpdateXidAny(&mytup); ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid); UnlockReleaseBuffer(buf); if (vmbuffer != InvalidBuffer) *************** heap_abort_speculative(Relation relation *** 6133,6144 **** tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; /* * Sanity check that the tuple really is a speculatively inserted tuple, * inserted by us. */ ! if (tp.t_data->t_choice.t_heap.t_xmin != xid) elog(ERROR, "attempted to kill a tuple inserted by another transaction"); if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data))) elog(ERROR, "attempted to kill a non-speculative tuple"); --- 6632,6644 ---- tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; + HeapTupleCopyBaseFromPage(&tp, page); /* * Sanity check that the tuple really is a speculatively inserted tuple, * inserted by us. */ ! if (HeapTupleGetRawXmin(&tp) != xid) elog(ERROR, "attempted to kill a tuple inserted by another transaction"); if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data))) elog(ERROR, "attempted to kill a non-speculative tuple"); *************** heap_abort_speculative(Relation relation *** 6170,6176 **** * tuple immediately invisible everyone. (In particular, to any * transactions waiting on the speculative token, woken up later.) */ ! HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId); /* Clear the speculative insertion token too */ tp.t_data->t_ctid = tp.t_self; --- 6670,6676 ---- * tuple immediately invisible everyone. (In particular, to any * transactions waiting on the speculative token, woken up later.) */ ! HeapTupleSetXmin(&tp, InvalidTransactionId); /* Clear the speculative insertion token too */ tp.t_data->t_ctid = tp.t_self; *************** FreezeMultiXactId(MultiXactId multi, uin *** 6618,6639 **** * recovery. We really need to remove old xids. */ bool ! heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen_p) { bool changed = false; bool freeze_xmax = false; TransactionId xid; bool totally_frozen = true; frz->frzflags = 0; frz->t_infomask2 = tuple->t_infomask2; frz->t_infomask = tuple->t_infomask; ! frz->xmax = HeapTupleHeaderGetRawXmax(tuple); /* Process xmin */ ! xid = HeapTupleHeaderGetXmin(tuple); if (TransactionIdIsNormal(xid)) { if (TransactionIdPrecedes(xid, cutoff_xid)) --- 7118,7140 ---- * recovery. We really need to remove old xids. */ bool ! heap_prepare_freeze_tuple(HeapTuple htup, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen_p) { bool changed = false; bool freeze_xmax = false; TransactionId xid; + HeapTupleHeader tuple = htup->t_data; bool totally_frozen = true; frz->frzflags = 0; frz->t_infomask2 = tuple->t_infomask2; frz->t_infomask = tuple->t_infomask; ! frz->xmax = HeapTupleGetRawXmax(htup); /* Process xmin */ ! xid = HeapTupleGetXmin(htup); if (TransactionIdIsNormal(xid)) { if (TransactionIdPrecedes(xid, cutoff_xid)) *************** heap_prepare_freeze_tuple(HeapTupleHeade *** 6654,6660 **** * * Make sure to keep heap_tuple_needs_freeze in sync with this. */ ! xid = HeapTupleHeaderGetRawXmax(tuple); if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { --- 7155,7161 ---- * * Make sure to keep heap_tuple_needs_freeze in sync with this. */ ! xid = HeapTupleGetRawXmax(htup); if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { *************** heap_prepare_freeze_tuple(HeapTupleHeade *** 6796,6804 **** * NB: All code in here must be safe to execute during crash recovery! */ void ! heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz) { ! HeapTupleHeaderSetXmax(tuple, frz->xmax); if (frz->frzflags & XLH_FREEZE_XVAC) HeapTupleHeaderSetXvac(tuple, FrozenTransactionId); --- 7297,7322 ---- * NB: All code in here must be safe to execute during crash recovery! */ void ! heap_execute_freeze_tuple(HeapTuple htup, xl_heap_freeze_tuple *frz) { ! HeapTupleHeader tuple = htup->t_data; ! ! HeapTupleSetXmax(htup, frz->xmax); ! ! if (frz->frzflags & XLH_FREEZE_XVAC) ! HeapTupleHeaderSetXvac(tuple, FrozenTransactionId); ! ! if (frz->frzflags & XLH_INVALID_XVAC) ! HeapTupleHeaderSetXvac(tuple, InvalidTransactionId); ! ! tuple->t_infomask = frz->t_infomask; ! tuple->t_infomask2 = frz->t_infomask2; ! } ! ! void ! heap_execute_freeze_tuple_page(Page page, HeapTupleHeader tuple, xl_heap_freeze_tuple *frz) ! { ! HeapTupleHeaderSetXmax(page, tuple, frz->xmax); if (frz->frzflags & XLH_FREEZE_XVAC) HeapTupleHeaderSetXvac(tuple, FrozenTransactionId); *************** heap_execute_freeze_tuple(HeapTupleHeade *** 6817,6823 **** * Useful for callers like CLUSTER that perform their own WAL logging. */ bool ! heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, TransactionId cutoff_multi) { xl_heap_freeze_tuple frz; --- 7335,7341 ---- * Useful for callers like CLUSTER that perform their own WAL logging. */ bool ! heap_freeze_tuple(HeapTuple tuple, TransactionId cutoff_xid, TransactionId cutoff_multi) { xl_heap_freeze_tuple frz; *************** MultiXactIdGetUpdateXid(TransactionId xm *** 6978,6987 **** * checking the hint bits. */ TransactionId ! HeapTupleGetUpdateXid(HeapTupleHeader tuple) { ! return MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(tuple), ! tuple->t_infomask); } /* --- 7496,7505 ---- * checking the hint bits. */ TransactionId ! HeapTupleGetUpdateXid(HeapTuple tuple) { ! return MultiXactIdGetUpdateXid(HeapTupleGetRawXmax(tuple), ! tuple->t_data->t_infomask); } /* *************** bool *** 7199,7210 **** heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) { TransactionId xid; /* * If xmin is a normal transaction ID, this tuple is definitely not * frozen. */ ! xid = HeapTupleHeaderGetXmin(tuple); if (TransactionIdIsNormal(xid)) return true; --- 7717,7732 ---- heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) { TransactionId xid; + HeapTupleData htup; + + htup.t_data = tuple; + HeapTupleSetZeroBase(&htup); /* * If xmin is a normal transaction ID, this tuple is definitely not * frozen. */ ! xid = HeapTupleGetXmin(&htup); if (TransactionIdIsNormal(xid)) return true; *************** heap_tuple_needs_eventual_freeze(HeapTup *** 7215,7227 **** { MultiXactId multi; ! multi = HeapTupleHeaderGetRawXmax(tuple); if (MultiXactIdIsValid(multi)) return true; } else { ! xid = HeapTupleHeaderGetRawXmax(tuple); if (TransactionIdIsNormal(xid)) return true; } --- 7737,7749 ---- { MultiXactId multi; ! multi = HeapTupleGetRawXmax(&htup); if (MultiXactIdIsValid(multi)) return true; } else { ! xid = HeapTupleGetRawXmax(&htup); if (TransactionIdIsNormal(xid)) return true; } *************** heap_tuple_needs_eventual_freeze(HeapTup *** 7249,7260 **** * on a standby. */ bool ! heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf) { TransactionId xid; ! xid = HeapTupleHeaderGetXmin(tuple); if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, cutoff_xid)) return true; --- 7771,7783 ---- * on a standby. */ bool ! heap_tuple_needs_freeze(HeapTuple htup, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf) { TransactionId xid; + HeapTupleHeader tuple = htup->t_data; ! xid = HeapTupleGetXmin(htup); if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, cutoff_xid)) return true; *************** heap_tuple_needs_freeze(HeapTupleHeader *** 7268,7274 **** { MultiXactId multi; ! multi = HeapTupleHeaderGetRawXmax(tuple); if (!MultiXactIdIsValid(multi)) { /* no xmax set, ignore */ --- 7791,7797 ---- { MultiXactId multi; ! multi = HeapTupleGetRawXmax(htup); if (!MultiXactIdIsValid(multi)) { /* no xmax set, ignore */ *************** heap_tuple_needs_freeze(HeapTupleHeader *** 7303,7309 **** } else { ! xid = HeapTupleHeaderGetRawXmax(tuple); if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, cutoff_xid)) return true; --- 7826,7832 ---- } else { ! xid = HeapTupleGetRawXmax(htup); if (TransactionIdIsNormal(xid) && TransactionIdPrecedes(xid, cutoff_xid)) return true; *************** heap_tuple_needs_freeze(HeapTupleHeader *** 7328,7341 **** * with queries. */ void ! HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId *latestRemovedXid) { ! TransactionId xmin = HeapTupleHeaderGetXmin(tuple); ! TransactionId xmax = HeapTupleHeaderGetUpdateXid(tuple); ! TransactionId xvac = HeapTupleHeaderGetXvac(tuple); ! if (tuple->t_infomask & HEAP_MOVED) { if (TransactionIdPrecedes(*latestRemovedXid, xvac)) *latestRemovedXid = xvac; --- 7851,7864 ---- * with queries. */ void ! HeapTupleHeaderAdvanceLatestRemovedXid(HeapTuple tuple, TransactionId *latestRemovedXid) { ! TransactionId xmin = HeapTupleGetXmin(tuple); ! TransactionId xmax = HeapTupleGetUpdateXidAny(tuple); ! TransactionId xvac = HeapTupleHeaderGetXvac(tuple->t_data); ! if (tuple->t_data->t_infomask & HEAP_MOVED) { if (TransactionIdPrecedes(*latestRemovedXid, xvac)) *latestRemovedXid = xvac; *************** HeapTupleHeaderAdvanceLatestRemovedXid(H *** 7349,7356 **** * This needs to work on both master and standby, where it is used to * assess btree delete records. */ ! if (HeapTupleHeaderXminCommitted(tuple) || ! (!HeapTupleHeaderXminInvalid(tuple) && TransactionIdDidCommit(xmin))) { if (xmax != xmin && TransactionIdFollows(xmax, *latestRemovedXid)) --- 7872,7879 ---- * This needs to work on both master and standby, where it is used to * assess btree delete records. */ ! if (HeapTupleHeaderXminCommitted(tuple->t_data) || ! (!HeapTupleHeaderXminInvalid(tuple->t_data) && TransactionIdDidCommit(xmin))) { if (xmax != xmin && TransactionIdFollows(xmax, *latestRemovedXid)) *************** log_heap_update(Relation reln, Buffer ol *** 7634,7646 **** /* Prepare WAL data for the old page */ xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self); ! xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data); xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask, oldtup->t_data->t_infomask2); /* Prepare WAL data for the new page */ xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self); ! xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data); bufflags = REGBUF_STANDARD; if (init) --- 8157,8169 ---- /* Prepare WAL data for the old page */ xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self); ! xlrec.old_xmax = HeapTupleGetRawXmax(oldtup); xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask, oldtup->t_data->t_infomask2); /* Prepare WAL data for the new page */ xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self); ! xlrec.new_xmax = HeapTupleGetRawXmax(newtup); bufflags = REGBUF_STANDARD; if (init) *************** log_heap_update(Relation reln, Buffer ol *** 7652,7657 **** --- 8175,8184 ---- if (oldbuf != newbuf) XLogRegisterBuffer(1, oldbuf, REGBUF_STANDARD); + if (info & XLOG_HEAP_INIT_PAGE) + { + XLogRegisterData((char *) &HeapPageGetSpecial(page)->pd_xid_base, sizeof(TransactionId)); + } XLogRegisterData((char *) &xlrec, SizeOfHeapUpdate); /* *************** heap_xlog_clean(XLogReaderState *record) *** 8006,8012 **** heap_page_prune_execute(buffer, redirected, nredirected, nowdead, ndead, ! nowunused, nunused); freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ --- 8533,8540 ---- heap_page_prune_execute(buffer, redirected, nredirected, nowdead, ndead, ! nowunused, nunused, ! true); freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ *************** heap_xlog_freeze_page(XLogReaderState *r *** 8197,8203 **** lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */ tuple = (HeapTupleHeader) PageGetItem(page, lp); ! heap_execute_freeze_tuple(tuple, xlrec_tp); } PageSetLSN(page, lsn); --- 8725,8731 ---- lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */ tuple = (HeapTupleHeader) PageGetItem(page, lp); ! heap_execute_freeze_tuple_page(page, tuple, xlrec_tp); } PageSetLSN(page, lsn); *************** heap_xlog_delete(XLogReaderState *record *** 8284,8292 **** fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); if (!(xlrec->flags & XLH_DELETE_IS_SUPER)) ! HeapTupleHeaderSetXmax(htup, xlrec->xmax); else ! HeapTupleHeaderSetXmin(htup, InvalidTransactionId); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Mark the page as a candidate for pruning */ --- 8812,8820 ---- fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); if (!(xlrec->flags & XLH_DELETE_IS_SUPER)) ! HeapTupleHeaderSetXmax(page, htup, xlrec->xmax); else ! HeapTupleHeaderSetXmin(page, htup, InvalidTransactionId); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Mark the page as a candidate for pruning */ *************** static void *** 8308,8314 **** heap_xlog_insert(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ! xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record); Buffer buffer; Page page; union --- 8836,8842 ---- heap_xlog_insert(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; ! xl_heap_insert *xlrec; Buffer buffer; Page page; union *************** heap_xlog_insert(XLogReaderState *record *** 8324,8329 **** --- 8852,8869 ---- BlockNumber blkno; ItemPointerData target_tid; XLogRedoAction action; + bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; + TransactionId pd_xid_base = InvalidTransactionId; + Pointer rec_data; + + rec_data = (Pointer) XLogRecGetData(record); + if (isinit) + { + pd_xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + + xlrec = (xl_heap_insert *) rec_data; XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno); ItemPointerSetBlockNumber(&target_tid, blkno); *************** heap_xlog_insert(XLogReaderState *record *** 8348,8358 **** * If we inserted the first and only tuple on the page, re-initialize the * page from scratch. */ ! if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; action = BLK_NEEDS_REDO; } --- 8888,8899 ---- * If we inserted the first and only tuple on the page, re-initialize the * page from scratch. */ ! if (isinit) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base; HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; action = BLK_NEEDS_REDO; } *************** heap_xlog_insert(XLogReaderState *record *** 8385,8391 **** htup->t_infomask2 = xlhdr.t_infomask2; htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; ! HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); HeapTupleHeaderSetCmin(htup, FirstCommandId); htup->t_ctid = target_tid; --- 8926,8932 ---- htup->t_infomask2 = xlhdr.t_infomask2; htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; ! HeapTupleHeaderSetXmin(page, htup, XLogRecGetXid(record)); HeapTupleHeaderSetCmin(htup, FirstCommandId); htup->t_ctid = target_tid; *************** heap_xlog_multi_insert(XLogReaderState * *** 8441,8452 **** int i; bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; XLogRedoAction action; /* * Insertion doesn't overwrite MVCC data, so no conflict processing is * required. */ ! xlrec = (xl_heap_multi_insert *) XLogRecGetData(record); XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno); --- 8982,9001 ---- int i; bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; XLogRedoAction action; + TransactionId pd_xid_base = InvalidTransactionId; + Pointer rec_data; /* * Insertion doesn't overwrite MVCC data, so no conflict processing is * required. */ ! rec_data = (Pointer) XLogRecGetData(record); ! if (isinit) ! { ! pd_xid_base = *((TransactionId *) rec_data); ! rec_data += sizeof(TransactionId); ! } ! xlrec = (xl_heap_multi_insert *) rec_data; XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno); *************** heap_xlog_multi_insert(XLogReaderState * *** 8470,8475 **** --- 9019,9025 ---- buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base; HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; action = BLK_NEEDS_REDO; } *************** heap_xlog_multi_insert(XLogReaderState * *** 8521,8527 **** htup->t_infomask2 = xlhdr->t_infomask2; htup->t_infomask = xlhdr->t_infomask; htup->t_hoff = xlhdr->t_hoff; ! HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); HeapTupleHeaderSetCmin(htup, FirstCommandId); ItemPointerSetBlockNumber(&htup->t_ctid, blkno); ItemPointerSetOffsetNumber(&htup->t_ctid, offnum); --- 9071,9077 ---- htup->t_infomask2 = xlhdr->t_infomask2; htup->t_infomask = xlhdr->t_infomask; htup->t_hoff = xlhdr->t_hoff; ! HeapTupleHeaderSetXmin(page, htup, XLogRecGetXid(record)); HeapTupleHeaderSetCmin(htup, FirstCommandId); ItemPointerSetBlockNumber(&htup->t_ctid, blkno); ItemPointerSetOffsetNumber(&htup->t_ctid, offnum); *************** static void *** 8565,8571 **** heap_xlog_update(XLogReaderState *record, bool hot_update) { XLogRecPtr lsn = record->EndRecPtr; ! xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record); RelFileNode rnode; BlockNumber oldblk; BlockNumber newblk; --- 9115,9121 ---- heap_xlog_update(XLogReaderState *record, bool hot_update) { XLogRecPtr lsn = record->EndRecPtr; ! xl_heap_update *xlrec; RelFileNode rnode; BlockNumber oldblk; BlockNumber newblk; *************** heap_xlog_update(XLogReaderState *record *** 8590,8595 **** --- 9140,9157 ---- Size freespace = 0; XLogRedoAction oldaction; XLogRedoAction newaction; + bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; + TransactionId pd_xid_base = InvalidTransactionId; + Pointer rec_data; + + rec_data = (Pointer) XLogRecGetData(record); + if (isinit) + { + pd_xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + + xlrec = (xl_heap_update *) rec_data; /* initialize to keep the compiler quiet */ oldtup.t_data = NULL; *************** heap_xlog_update(XLogReaderState *record *** 8648,8653 **** --- 9210,9216 ---- oldtup.t_data = htup; oldtup.t_len = ItemIdGetLength(lp); + HeapTupleCopyBaseFromPage(&oldtup, page); htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; *************** heap_xlog_update(XLogReaderState *record *** 8657,8663 **** HeapTupleHeaderClearHotUpdated(htup); fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask, &htup->t_infomask2); ! HeapTupleHeaderSetXmax(htup, xlrec->old_xmax); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Set forward chain link in t_ctid */ htup->t_ctid = newtid; --- 9220,9226 ---- HeapTupleHeaderClearHotUpdated(htup); fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask, &htup->t_infomask2); ! HeapTupleHeaderSetXmax(page, htup, xlrec->old_xmax); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Set forward chain link in t_ctid */ htup->t_ctid = newtid; *************** heap_xlog_update(XLogReaderState *record *** 8680,8690 **** nbuffer = obuffer; newaction = oldaction; } ! else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) { nbuffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(nbuffer); PageInit(page, BufferGetPageSize(nbuffer), sizeof(HeapPageSpecialData)); HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; newaction = BLK_NEEDS_REDO; } --- 9243,9254 ---- nbuffer = obuffer; newaction = oldaction; } ! else if (isinit) { nbuffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(nbuffer); PageInit(page, BufferGetPageSize(nbuffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base; HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; newaction = BLK_NEEDS_REDO; } *************** heap_xlog_update(XLogReaderState *record *** 8791,8799 **** htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; ! HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); HeapTupleHeaderSetCmin(htup, FirstCommandId); ! HeapTupleHeaderSetXmax(htup, xlrec->new_xmax); /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = newtid; --- 9355,9363 ---- htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; ! HeapTupleHeaderSetXmin(page, htup, XLogRecGetXid(record)); HeapTupleHeaderSetCmin(htup, FirstCommandId); ! HeapTupleHeaderSetXmax(page, htup, xlrec->new_xmax); /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = newtid; *************** heap_xlog_lock(XLogReaderState *record) *** 8932,8938 **** BufferGetBlockNumber(buffer), offnum); } ! HeapTupleHeaderSetXmax(htup, xlrec->locking_xid); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); PageSetLSN(page, lsn); MarkBufferDirty(buffer); --- 9496,9502 ---- BufferGetBlockNumber(buffer), offnum); } ! HeapTupleHeaderSetXmax(page, htup, xlrec->locking_xid); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); PageSetLSN(page, lsn); MarkBufferDirty(buffer); *************** heap_xlog_lock_updated(XLogReaderState * *** 8992,8998 **** htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); ! HeapTupleHeaderSetXmax(htup, xlrec->xmax); PageSetLSN(page, lsn); MarkBufferDirty(buffer); --- 9556,9562 ---- htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); ! HeapTupleHeaderSetXmax(page, htup, xlrec->xmax); PageSetLSN(page, lsn); MarkBufferDirty(buffer); *************** heap_xlog_inplace(XLogReaderState *recor *** 9042,9047 **** --- 9606,9636 ---- UnlockReleaseBuffer(buffer); } + static void + heap_xlog_base_shift(XLogReaderState *record) + { + XLogRecPtr lsn = record->EndRecPtr; + xl_heap_base_shift *xlrec = (xl_heap_base_shift *) XLogRecGetData(record); + Buffer buffer; + Page page; + BlockNumber blkno; + RelFileNode target_node; + + XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno); + + if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) + { + page = BufferGetPage(buffer); + heap_page_shift_base(InvalidBuffer, page, xlrec->multi, xlrec->delta); + PageSetLSN(page, lsn); + MarkBufferDirty(buffer); + } + + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); + } + + void heap_redo(XLogReaderState *record) { *************** heap_redo(XLogReaderState *record) *** 9063,9068 **** --- 9652,9660 ---- case XLOG_HEAP_UPDATE: heap_xlog_update(record, false); break; + case XLOG_HEAP_BASE_SHIFT: + heap_xlog_base_shift(record); + break; case XLOG_HEAP_HOT_UPDATE: heap_xlog_update(record, true); break; *************** heap_mask(char *pagedata, BlockNumber bl *** 9174,9180 **** mask_page_hint_bits(page); /* Ignore prune_xid (it's like a hint-bit) */ ! HeapPageGetSpecial(page)->pd_prune_xid = InvalidTransactionId; mask_unused_space(page); --- 9766,9772 ---- mask_page_hint_bits(page); /* Ignore prune_xid (it's like a hint-bit) */ ! HeapPageSetPruneXid(page, InvalidTransactionId); mask_unused_space(page); diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c new file mode 100644 index 8d8b5c1..82b3811 *** a/src/backend/access/heap/hio.c --- b/src/backend/access/heap/hio.c *************** void *** 36,42 **** RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, ! bool token) { Page pageHeader; OffsetNumber offnum; --- 36,43 ---- RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple, ! bool token, ! TransactionId xid) { Page pageHeader; OffsetNumber offnum; *************** RelationPutHeapTuple(Relation relation, *** 50,55 **** --- 51,59 ---- /* Add the tuple to the page */ pageHeader = BufferGetPage(buffer); + /* FIXME */ + tuple->t_data->t_choice.t_heap.t_xmin = NormalTransactionIdToShort(HeapPageGetSpecial(pageHeader)->pd_xid_base, xid); + offnum = PageAddItem(pageHeader, (Item) tuple->t_data, tuple->t_len, InvalidOffsetNumber, false, true); *************** RelationPutHeapTuple(Relation relation, *** 68,75 **** { ItemId itemId = PageGetItemId(pageHeader, offnum); Item item = PageGetItem(pageHeader, itemId); ! ((HeapTupleHeader) item)->t_ctid = tuple->t_self; } } --- 72,80 ---- { ItemId itemId = PageGetItemId(pageHeader, offnum); Item item = PageGetItem(pageHeader, itemId); + HeapTupleHeader tup = (HeapTupleHeader) item; ! tup->t_ctid = tuple->t_self; } } *************** RelationAddExtraBlocks(Relation relation *** 207,212 **** --- 212,218 ---- LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); page = BufferGetPage(buffer); PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; MarkBufferDirty(buffer); blockNum = BufferGetBlockNumber(buffer); *************** loop: *** 592,597 **** --- 598,604 ---- RelationGetRelationName(relation)); PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; if (len > PageGetHeapFreeSpace(page)) diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c new file mode 100644 index b526fc3..6460d17 *** a/src/backend/access/heap/pruneheap.c --- b/src/backend/access/heap/pruneheap.c *************** heap_page_prune_opt(Relation relation, B *** 152,158 **** * needed */ /* OK to prune */ ! (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore); } /* And release buffer lock */ --- 152,158 ---- * needed */ /* OK to prune */ ! (void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore, true); } /* And release buffer lock */ *************** heap_page_prune_opt(Relation relation, B *** 179,185 **** */ int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, ! bool report_stats, TransactionId *latestRemovedXid) { int ndeleted = 0; Page page = BufferGetPage(buffer); --- 179,185 ---- */ int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, ! bool report_stats, TransactionId *latestRemovedXid, bool repairFragmentation) { int ndeleted = 0; Page page = BufferGetPage(buffer); *************** heap_page_prune(Relation relation, Buffe *** 239,251 **** heap_page_prune_execute(buffer, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, ! prstate.nowunused, prstate.nunused); /* * Update the page's pd_prune_xid field to either zero, or the lowest * XID of any soon-prunable tuple. */ ! HeapPageGetSpecial(page)->pd_prune_xid = prstate.new_prune_xid; /* * Also clear the "page is full" flag, since there's no point in --- 239,252 ---- heap_page_prune_execute(buffer, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, ! prstate.nowunused, prstate.nunused, ! repairFragmentation); /* * Update the page's pd_prune_xid field to either zero, or the lowest * XID of any soon-prunable tuple. */ ! HeapPageSetPruneXid(page, prstate.new_prune_xid); /* * Also clear the "page is full" flag, since there's no point in *************** heap_page_prune(Relation relation, Buffe *** 283,292 **** * point in repeating the prune/defrag process until something else * happens to the page. */ ! if (HeapPageGetSpecial(page)->pd_prune_xid != prstate.new_prune_xid || PageIsFull(page)) { ! HeapPageGetSpecial(page)->pd_prune_xid = prstate.new_prune_xid; PageClearFull(page); MarkBufferDirtyHint(buffer, true); } --- 284,293 ---- * point in repeating the prune/defrag process until something else * happens to the page. */ ! if (HeapPageGetPruneXid(page) != prstate.new_prune_xid || PageIsFull(page)) { ! HeapPageSetPruneXid(page, prstate.new_prune_xid); PageClearFull(page); MarkBufferDirtyHint(buffer, true); } *************** heap_prune_chain(Relation relation, Buff *** 368,373 **** --- 369,375 ---- HeapTupleData tup; tup.t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(&tup, dp); rootlp = PageGetItemId(dp, rootoffnum); *************** heap_prune_chain(Relation relation, Buff *** 406,412 **** == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup)) { heap_prune_record_unused(prstate, rootoffnum); ! HeapTupleHeaderAdvanceLatestRemovedXid(htup, &prstate->latestRemovedXid); ndeleted++; } --- 408,414 ---- == HEAPTUPLE_DEAD && !HeapTupleHeaderIsHotUpdated(htup)) { heap_prune_record_unused(prstate, rootoffnum); ! HeapTupleHeaderAdvanceLatestRemovedXid(&tup, &prstate->latestRemovedXid); ndeleted++; } *************** heap_prune_chain(Relation relation, Buff *** 467,479 **** tup.t_data = htup; tup.t_len = ItemIdGetLength(lp); ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum); /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && ! !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) break; /* --- 469,482 ---- tup.t_data = htup; tup.t_len = ItemIdGetLength(lp); + HeapTupleCopyBaseFromPage(&tup, dp); ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum); /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && ! !TransactionIdEquals(HeapTupleGetXmin(&tup), priorXmax)) break; /* *************** heap_prune_chain(Relation relation, Buff *** 500,506 **** * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, ! HeapTupleHeaderGetUpdateXid(htup)); break; case HEAPTUPLE_DELETE_IN_PROGRESS: --- 503,509 ---- * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, ! HeapTupleGetUpdateXidAny(&tup)); break; case HEAPTUPLE_DELETE_IN_PROGRESS: *************** heap_prune_chain(Relation relation, Buff *** 510,516 **** * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, ! HeapTupleHeaderGetUpdateXid(htup)); break; case HEAPTUPLE_LIVE: --- 513,519 ---- * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, ! HeapTupleGetUpdateXidAny(&tup)); break; case HEAPTUPLE_LIVE: *************** heap_prune_chain(Relation relation, Buff *** 539,545 **** if (tupdead) { latestdead = offnum; ! HeapTupleHeaderAdvanceLatestRemovedXid(htup, &prstate->latestRemovedXid); } else if (!recent_dead) --- 542,548 ---- if (tupdead) { latestdead = offnum; ! HeapTupleHeaderAdvanceLatestRemovedXid(&tup, &prstate->latestRemovedXid); } else if (!recent_dead) *************** heap_prune_chain(Relation relation, Buff *** 558,564 **** Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == BufferGetBlockNumber(buffer)); offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); ! priorXmax = HeapTupleHeaderGetUpdateXid(htup); } /* --- 561,567 ---- Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == BufferGetBlockNumber(buffer)); offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); ! priorXmax = HeapTupleGetUpdateXidAny(&tup); } /* *************** void *** 679,685 **** heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, ! OffsetNumber *nowunused, int nunused) { Page page = (Page) BufferGetPage(buffer); OffsetNumber *offnum; --- 682,689 ---- heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, ! OffsetNumber *nowunused, int nunused, ! bool repairFragmentation) { Page page = (Page) BufferGetPage(buffer); OffsetNumber *offnum; *************** heap_page_prune_execute(Buffer buffer, *** 720,726 **** * Finally, repair any fragmentation, and update the page's hint bit about * whether it has free pointers. */ ! PageRepairFragmentation(page); } --- 724,731 ---- * Finally, repair any fragmentation, and update the page's hint bit about * whether it has free pointers. */ ! if (repairFragmentation) ! PageRepairFragmentation(page); } *************** heap_get_root_tuples(Page page, OffsetNu *** 754,759 **** --- 759,765 ---- HeapTupleHeader htup; OffsetNumber nextoffnum; TransactionId priorXmax; + HeapTupleData tup; /* skip unused and dead items */ if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp)) *************** heap_get_root_tuples(Page page, OffsetNu *** 762,767 **** --- 768,775 ---- if (ItemIdIsNormal(lp)) { htup = (HeapTupleHeader) PageGetItem(page, lp); + tup.t_data = htup; + HeapTupleCopyBaseFromPage(&tup, page); /* * Check if this tuple is part of a HOT-chain rooted at some other *************** heap_get_root_tuples(Page page, OffsetNu *** 783,789 **** /* Set up to scan the HOT-chain */ nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); ! priorXmax = HeapTupleHeaderGetUpdateXid(htup); } else { --- 791,797 ---- /* Set up to scan the HOT-chain */ nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); ! priorXmax = HeapTupleGetUpdateXidAny(&tup); } else { *************** heap_get_root_tuples(Page page, OffsetNu *** 811,819 **** break; htup = (HeapTupleHeader) PageGetItem(page, lp); if (TransactionIdIsValid(priorXmax) && ! !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup))) break; /* Remember the root line pointer for this item */ --- 819,829 ---- break; htup = (HeapTupleHeader) PageGetItem(page, lp); + tup.t_data = htup; + HeapTupleCopyBaseFromPage(&tup, page); if (TransactionIdIsValid(priorXmax) && ! !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tup))) break; /* Remember the root line pointer for this item */ *************** heap_get_root_tuples(Page page, OffsetNu *** 824,830 **** break; nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); ! priorXmax = HeapTupleHeaderGetUpdateXid(htup); } } } --- 834,840 ---- break; nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); ! priorXmax = HeapTupleGetUpdateXidAny(&tup); } } } diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c new file mode 100644 index 0a7b799..c6ce437 *** a/src/backend/access/heap/rewriteheap.c --- b/src/backend/access/heap/rewriteheap.c *************** rewrite_heap_tuple(RewriteState state, *** 398,403 **** --- 398,404 ---- &old_tuple->t_data->t_choice.t_heap, sizeof(HeapTupleFields)); + HeapTupleCopyBase(new_tuple, old_tuple); new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK; new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK; new_tuple->t_data->t_infomask |= *************** rewrite_heap_tuple(RewriteState state, *** 407,413 **** * While we have our hands on the tuple, we may as well freeze any * eligible xmin or xmax, so that future VACUUM effort can be saved. */ ! heap_freeze_tuple(new_tuple->t_data, state->rs_freeze_xid, state->rs_cutoff_multi); /* --- 408,414 ---- * While we have our hands on the tuple, we may as well freeze any * eligible xmin or xmax, so that future VACUUM effort can be saved. */ ! heap_freeze_tuple(new_tuple, state->rs_freeze_xid, state->rs_cutoff_multi); /* *************** rewrite_heap_tuple(RewriteState state, *** 420,433 **** * If the tuple has been updated, check the old-to-new mapping hash table. */ if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || ! HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) && !(ItemPointerEquals(&(old_tuple->t_self), &(old_tuple->t_data->t_ctid)))) { OldToNewMapping mapping; memset(&hashkey, 0, sizeof(hashkey)); ! hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data); hashkey.tid = old_tuple->t_data->t_ctid; mapping = (OldToNewMapping) --- 421,434 ---- * If the tuple has been updated, check the old-to-new mapping hash table. */ if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || ! HeapTupleHeaderIsOnlyLocked(old_tuple)) && !(ItemPointerEquals(&(old_tuple->t_self), &(old_tuple->t_data->t_ctid)))) { OldToNewMapping mapping; memset(&hashkey, 0, sizeof(hashkey)); ! hashkey.xmin = HeapTupleGetUpdateXidAny(old_tuple); hashkey.tid = old_tuple->t_data->t_ctid; mapping = (OldToNewMapping) *************** rewrite_heap_tuple(RewriteState state, *** 500,506 **** * RECENTLY_DEAD if and only if the xmin is not before OldestXmin. */ if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) && ! !TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data), state->rs_oldest_xmin)) { /* --- 501,507 ---- * RECENTLY_DEAD if and only if the xmin is not before OldestXmin. */ if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) && ! !TransactionIdPrecedes(HeapTupleGetXmin(new_tuple), state->rs_oldest_xmin)) { /* *************** rewrite_heap_tuple(RewriteState state, *** 509,515 **** UnresolvedTup unresolved; memset(&hashkey, 0, sizeof(hashkey)); ! hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); hashkey.tid = old_tid; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, --- 510,516 ---- UnresolvedTup unresolved; memset(&hashkey, 0, sizeof(hashkey)); ! hashkey.xmin = HeapTupleGetXmin(new_tuple); hashkey.tid = old_tid; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, *************** rewrite_heap_dead_tuple(RewriteState sta *** 597,603 **** bool found; memset(&hashkey, 0, sizeof(hashkey)); ! hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data); hashkey.tid = old_tuple->t_self; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, --- 598,604 ---- bool found; memset(&hashkey, 0, sizeof(hashkey)); ! hashkey.xmin = HeapTupleGetXmin(old_tuple); hashkey.tid = old_tuple->t_self; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, *************** raw_heap_insert(RewriteState state, Heap *** 633,638 **** --- 634,640 ---- Size len; OffsetNumber newoff; HeapTuple heaptup; + TransactionId xmin, xmax; /* * If the new tuple is too big for storage or contains already toasted *************** raw_heap_insert(RewriteState state, Heap *** 709,718 **** --- 711,732 ---- { /* Initialize a new empty page */ PageInit(page, BLCKSZ, sizeof(HeapPageSpecialData)); + /* FIXME */ HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; state->rs_buffer_valid = true; } + /* FIXME */ + rewrite_page_prepare_for_xid(page, HeapTupleGetXmin(heaptup), false); + rewrite_page_prepare_for_xid(page, HeapTupleGetRawXmax(heaptup), + (heaptup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + + xmin = HeapTupleGetXmin(heaptup); + xmax = HeapTupleGetRawXmax(heaptup); + HeapTupleCopyBaseFromPage(heaptup, page); + HeapTupleSetXmin(heaptup, xmin); + HeapTupleSetXmax(heaptup, xmax); + /* And now we can insert the tuple into the page */ newoff = PageAddItem(page, (Item) heaptup->t_data, heaptup->t_len, InvalidOffsetNumber, false, true); *************** logical_rewrite_log_mapping(RewriteState *** 1007,1013 **** dboid, relid, (uint32) (state->rs_begin_lsn >> 32), (uint32) state->rs_begin_lsn, ! xid, GetCurrentTransactionId()); dlist_init(&src->mappings); src->num_mappings = 0; --- 1021,1030 ---- dboid, relid, (uint32) (state->rs_begin_lsn >> 32), (uint32) state->rs_begin_lsn, ! (uint32) (xid >> 32), ! (uint32) xid, ! (uint32) (GetCurrentTransactionId() >> 32), ! (uint32) GetCurrentTransactionId()); dlist_init(&src->mappings); src->num_mappings = 0; *************** logical_rewrite_heap_tuple(RewriteState *** 1057,1065 **** if (!state->rs_logical_rewrite) return; ! xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); /* use *GetUpdateXid to correctly deal with multixacts */ ! xmax = HeapTupleHeaderGetUpdateXid(new_tuple->t_data); /* * Log the mapping iff the tuple has been created recently. --- 1074,1082 ---- if (!state->rs_logical_rewrite) return; ! xmin = HeapTupleGetXmin(new_tuple); /* use *GetUpdateXid to correctly deal with multixacts */ ! xmax = HeapTupleGetUpdateXidAny(new_tuple); /* * Log the mapping iff the tuple has been created recently. *************** heap_xlog_logical_rewrite(XLogReaderStat *** 1131,1137 **** xlrec->mapped_db, xlrec->mapped_rel, (uint32) (xlrec->start_lsn >> 32), (uint32) xlrec->start_lsn, ! xlrec->mapped_xid, XLogRecGetXid(r)); fd = OpenTransientFile(path, O_CREAT | O_WRONLY | PG_BINARY, --- 1148,1157 ---- xlrec->mapped_db, xlrec->mapped_rel, (uint32) (xlrec->start_lsn >> 32), (uint32) xlrec->start_lsn, ! (uint32) (xlrec->mapped_xid >> 32), ! (uint32) xlrec->mapped_xid, ! (uint32) (XLogRecGetXid(r) >> 32), ! (uint32) XLogRecGetXid(r)); fd = OpenTransientFile(path, O_CREAT | O_WRONLY | PG_BINARY, *************** CheckPointLogicalRewriteHeap(void) *** 1226,1235 **** Oid dboid; Oid relid; XLogRecPtr lsn; ! TransactionId rewrite_xid; ! TransactionId create_xid; ! uint32 hi, ! lo; if (strcmp(mapping_de->d_name, ".") == 0 || strcmp(mapping_de->d_name, "..") == 0) --- 1246,1257 ---- Oid dboid; Oid relid; XLogRecPtr lsn; ! uint32 lsn_hi, ! lsn_lo, ! rewrite_xid_hi, ! rewrite_xid_lo, ! create_xid_hi, ! create_xid_lo; if (strcmp(mapping_de->d_name, ".") == 0 || strcmp(mapping_de->d_name, "..") == 0) *************** CheckPointLogicalRewriteHeap(void) *** 1244,1253 **** continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, ! &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); ! lsn = ((uint64) hi) << 32 | lo; if (lsn < cutoff || cutoff == InvalidXLogRecPtr) { --- 1266,1277 ---- continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, ! &dboid, &relid, &lsn_hi, &lsn_lo, ! &rewrite_xid_hi, &rewrite_xid_lo, ! &create_xid_hi, &create_xid_lo) != 8) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); ! lsn = ((uint64) lsn_hi) << 32 | lsn_lo; if (lsn < cutoff || cutoff == InvalidXLogRecPtr) { diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c new file mode 100644 index 5a8f1da..6a92da7 *** a/src/backend/access/heap/tuptoaster.c --- b/src/backend/access/heap/tuptoaster.c *************** toast_insert_or_update(Relation rel, Hea *** 1027,1032 **** --- 1027,1033 ---- result_tuple->t_len = new_tuple_len; result_tuple->t_self = newtup->t_self; result_tuple->t_tableOid = newtup->t_tableOid; + HeapTupleCopyBase(result_tuple, newtup); new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE); result_tuple->t_data = new_data; *************** toast_flatten_tuple(HeapTuple tup, Tuple *** 1133,1138 **** --- 1134,1140 ---- new_tuple->t_self = tup->t_self; new_tuple->t_tableOid = tup->t_tableOid; + HeapTupleCopyBase(new_tuple, tup); new_tuple->t_data->t_choice = tup->t_data->t_choice; new_tuple->t_data->t_ctid = tup->t_data->t_ctid; *************** toast_flatten_tuple_to_datum(HeapTupleHe *** 1205,1210 **** --- 1207,1213 ---- ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = tup; + HeapTupleSetZeroBase(&tmptup); /* * Break down the tuple into fields. diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c new file mode 100644 index 3610c7c..32e6431 *** a/src/backend/access/nbtree/nbtxlog.c --- b/src/backend/access/nbtree/nbtxlog.c *************** *** 16,21 **** --- 16,22 ---- #include "access/bufmask.h" #include "access/heapam_xlog.h" + #include "access/htup_details.h" #include "access/nbtree.h" #include "access/nbtxlog.h" #include "access/transam.h" *************** btree_xlog_delete_get_latestRemovedXid(X *** 628,636 **** */ if (ItemIdHasStorage(hitemid)) { htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid); ! HeapTupleHeaderAdvanceLatestRemovedXid(htuphdr, &latestRemovedXid); } else if (ItemIdIsDead(hitemid)) { --- 629,641 ---- */ if (ItemIdHasStorage(hitemid)) { + HeapTupleData htup; + htuphdr = (HeapTupleHeader) PageGetItem(hpage, hitemid); + htup.t_data = htuphdr; + HeapTupleCopyBaseFromPage(&htup, hpage); ! HeapTupleHeaderAdvanceLatestRemovedXid(&htup, &latestRemovedXid); } else if (ItemIdIsDead(hitemid)) { diff --git a/src/backend/access/rmgrdesc/clogdesc.c b/src/backend/access/rmgrdesc/clogdesc.c new file mode 100644 index 9181154..844c83d *** a/src/backend/access/rmgrdesc/clogdesc.c --- b/src/backend/access/rmgrdesc/clogdesc.c *************** clog_desc(StringInfo buf, XLogReaderStat *** 25,41 **** if (info == CLOG_ZEROPAGE) { ! int pageno; ! memcpy(&pageno, rec, sizeof(int)); ! appendStringInfo(buf, "page %d", pageno); } else if (info == CLOG_TRUNCATE) { xl_clog_truncate xlrec; memcpy(&xlrec, rec, sizeof(xl_clog_truncate)); ! appendStringInfo(buf, "page %d; oldestXact %u", xlrec.pageno, xlrec.oldestXact); } } --- 25,41 ---- if (info == CLOG_ZEROPAGE) { ! int64 pageno; ! memcpy(&pageno, rec, sizeof(int64)); ! appendStringInfo(buf, "page " INT64_FORMAT, pageno); } else if (info == CLOG_TRUNCATE) { xl_clog_truncate xlrec; memcpy(&xlrec, rec, sizeof(xl_clog_truncate)); ! appendStringInfo(buf, "page " INT64_FORMAT "; oldestXact " XID_FMT, xlrec.pageno, xlrec.oldestXact); } } diff --git a/src/backend/access/rmgrdesc/committsdesc.c b/src/backend/access/rmgrdesc/committsdesc.c new file mode 100644 index 3e670bd..be6e167 *** a/src/backend/access/rmgrdesc/committsdesc.c --- b/src/backend/access/rmgrdesc/committsdesc.c *************** commit_ts_desc(StringInfo buf, XLogReade *** 26,41 **** if (info == COMMIT_TS_ZEROPAGE) { ! int pageno; ! memcpy(&pageno, rec, sizeof(int)); ! appendStringInfo(buf, "%d", pageno); } else if (info == COMMIT_TS_TRUNCATE) { xl_commit_ts_truncate *trunc = (xl_commit_ts_truncate *) rec; ! appendStringInfo(buf, "pageno %d, oldestXid %u", trunc->pageno, trunc->oldestXid); } else if (info == COMMIT_TS_SETTS) --- 26,41 ---- if (info == COMMIT_TS_ZEROPAGE) { ! int64 pageno; ! memcpy(&pageno, rec, sizeof(int64)); ! appendStringInfo(buf, INT64_FORMAT, pageno); } else if (info == COMMIT_TS_TRUNCATE) { xl_commit_ts_truncate *trunc = (xl_commit_ts_truncate *) rec; ! appendStringInfo(buf, "pageno " INT64_FORMAT ", oldestXid " XID_FMT, trunc->pageno, trunc->oldestXid); } else if (info == COMMIT_TS_SETTS) *************** commit_ts_desc(StringInfo buf, XLogReade *** 43,49 **** xl_commit_ts_set *xlrec = (xl_commit_ts_set *) rec; int nsubxids; ! appendStringInfo(buf, "set %s/%d for: %u", timestamptz_to_str(xlrec->timestamp), xlrec->nodeid, xlrec->mainxid); --- 43,49 ---- xl_commit_ts_set *xlrec = (xl_commit_ts_set *) rec; int nsubxids; ! appendStringInfo(buf, "set %s/%d for: " XID_FMT, timestamptz_to_str(xlrec->timestamp), xlrec->nodeid, xlrec->mainxid); *************** commit_ts_desc(StringInfo buf, XLogReade *** 59,65 **** XLogRecGetData(record) + SizeOfCommitTsSet, sizeof(TransactionId) * nsubxids); for (i = 0; i < nsubxids; i++) ! appendStringInfo(buf, ", %u", subxids[i]); pfree(subxids); } } --- 59,65 ---- XLogRecGetData(record) + SizeOfCommitTsSet, sizeof(TransactionId) * nsubxids); for (i = 0; i < nsubxids; i++) ! appendStringInfo(buf, ", " XID_FMT, subxids[i]); pfree(subxids); } } diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c new file mode 100644 index 44d2d63..33cd527 *** a/src/backend/access/rmgrdesc/heapdesc.c --- b/src/backend/access/rmgrdesc/heapdesc.c *************** heap_desc(StringInfo buf, XLogReaderStat *** 55,77 **** { xl_heap_update *xlrec = (xl_heap_update *) rec; ! appendStringInfo(buf, "off %u xmax %u ", xlrec->old_offnum, xlrec->old_xmax); out_infobits(buf, xlrec->old_infobits_set); ! appendStringInfo(buf, "; new off %u xmax %u", xlrec->new_offnum, xlrec->new_xmax); } else if (info == XLOG_HEAP_HOT_UPDATE) { xl_heap_update *xlrec = (xl_heap_update *) rec; ! appendStringInfo(buf, "off %u xmax %u ", xlrec->old_offnum, xlrec->old_xmax); out_infobits(buf, xlrec->old_infobits_set); ! appendStringInfo(buf, "; new off %u xmax %u", xlrec->new_offnum, xlrec->new_xmax); } --- 55,85 ---- { xl_heap_update *xlrec = (xl_heap_update *) rec; ! appendStringInfo(buf, "off %u xmax " XID_FMT " ", xlrec->old_offnum, xlrec->old_xmax); out_infobits(buf, xlrec->old_infobits_set); ! appendStringInfo(buf, "; new off %u xmax " XID_FMT, xlrec->new_offnum, xlrec->new_xmax); } + else if (info == XLOG_HEAP_BASE_SHIFT) + { + xl_heap_base_shift *xlrec = (xl_heap_base_shift *) rec; + + appendStringInfo(buf, "%s delta " INT64_FORMAT " ", + xlrec->multi ? "MultiXactId" : "XactId", + xlrec->delta); + } else if (info == XLOG_HEAP_HOT_UPDATE) { xl_heap_update *xlrec = (xl_heap_update *) rec; ! appendStringInfo(buf, "off %u xmax " XID_FMT " ", xlrec->old_offnum, xlrec->old_xmax); out_infobits(buf, xlrec->old_infobits_set); ! appendStringInfo(buf, "; new off %u xmax " XID_FMT, xlrec->new_offnum, xlrec->new_xmax); } *************** heap_desc(StringInfo buf, XLogReaderStat *** 85,91 **** { xl_heap_lock *xlrec = (xl_heap_lock *) rec; ! appendStringInfo(buf, "off %u: xid %u: flags %u ", xlrec->offnum, xlrec->locking_xid, xlrec->flags); out_infobits(buf, xlrec->infobits_set); } --- 93,99 ---- { xl_heap_lock *xlrec = (xl_heap_lock *) rec; ! appendStringInfo(buf, "off %u: xid " XID_FMT ": flags %u ", xlrec->offnum, xlrec->locking_xid, xlrec->flags); out_infobits(buf, xlrec->infobits_set); } *************** heap2_desc(StringInfo buf, XLogReaderSta *** 107,132 **** { xl_heap_clean *xlrec = (xl_heap_clean *) rec; ! appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid); } else if (info == XLOG_HEAP2_FREEZE_PAGE) { xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec; ! appendStringInfo(buf, "cutoff xid %u ntuples %u", xlrec->cutoff_xid, xlrec->ntuples); } else if (info == XLOG_HEAP2_CLEANUP_INFO) { xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec; ! appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid); } else if (info == XLOG_HEAP2_VISIBLE) { xl_heap_visible *xlrec = (xl_heap_visible *) rec; ! appendStringInfo(buf, "cutoff xid %u flags %d", xlrec->cutoff_xid, xlrec->flags); } else if (info == XLOG_HEAP2_MULTI_INSERT) --- 115,140 ---- { xl_heap_clean *xlrec = (xl_heap_clean *) rec; ! appendStringInfo(buf, "remxid " XID_FMT, xlrec->latestRemovedXid); } else if (info == XLOG_HEAP2_FREEZE_PAGE) { xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec; ! appendStringInfo(buf, "cutoff xid " XID_FMT " ntuples %u", xlrec->cutoff_xid, xlrec->ntuples); } else if (info == XLOG_HEAP2_CLEANUP_INFO) { xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec; ! appendStringInfo(buf, "remxid " XID_FMT, xlrec->latestRemovedXid); } else if (info == XLOG_HEAP2_VISIBLE) { xl_heap_visible *xlrec = (xl_heap_visible *) rec; ! appendStringInfo(buf, "cutoff xid " XID_FMT " flags %d", xlrec->cutoff_xid, xlrec->flags); } else if (info == XLOG_HEAP2_MULTI_INSERT) *************** heap2_desc(StringInfo buf, XLogReaderSta *** 139,145 **** { xl_heap_lock_updated *xlrec = (xl_heap_lock_updated *) rec; ! appendStringInfo(buf, "off %u: xmax %u: flags %u ", xlrec->offnum, xlrec->xmax, xlrec->flags); out_infobits(buf, xlrec->infobits_set); } --- 147,153 ---- { xl_heap_lock_updated *xlrec = (xl_heap_lock_updated *) rec; ! appendStringInfo(buf, "off %u: xmax " XID_FMT ": flags %u ", xlrec->offnum, xlrec->xmax, xlrec->flags); out_infobits(buf, xlrec->infobits_set); } *************** heap_identify(uint8 info) *** 180,185 **** --- 188,196 ---- case XLOG_HEAP_UPDATE | XLOG_HEAP_INIT_PAGE: id = "UPDATE+INIT"; break; + case XLOG_HEAP_BASE_SHIFT: + id = "BASE_SHIFT"; + break; case XLOG_HEAP_HOT_UPDATE: id = "HOT_UPDATE"; break; diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c new file mode 100644 index 9c17447..0c78b76 *** a/src/backend/access/rmgrdesc/mxactdesc.c --- b/src/backend/access/rmgrdesc/mxactdesc.c *************** *** 19,25 **** static void out_member(StringInfo buf, MultiXactMember *member) { ! appendStringInfo(buf, "%u ", member->xid); switch (member->status) { case MultiXactStatusForKeyShare: --- 19,25 ---- static void out_member(StringInfo buf, MultiXactMember *member) { ! appendStringInfo(buf, XID_FMT " ", member->xid); switch (member->status) { case MultiXactStatusForKeyShare: *************** multixact_desc(StringInfo buf, XLogReade *** 55,72 **** if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE || info == XLOG_MULTIXACT_ZERO_MEM_PAGE) { ! int pageno; memcpy(&pageno, rec, sizeof(int)); ! appendStringInfo(buf, "%d", pageno); } else if (info == XLOG_MULTIXACT_CREATE_ID) { xl_multixact_create *xlrec = (xl_multixact_create *) rec; int i; ! appendStringInfo(buf, "%u offset %u nmembers %d: ", xlrec->mid, ! xlrec->moff, xlrec->nmembers); for (i = 0; i < xlrec->nmembers; i++) out_member(buf, &xlrec->members[i]); } --- 55,72 ---- if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE || info == XLOG_MULTIXACT_ZERO_MEM_PAGE) { ! int64 pageno; memcpy(&pageno, rec, sizeof(int)); ! appendStringInfo(buf, INT64_FORMAT, pageno); } else if (info == XLOG_MULTIXACT_CREATE_ID) { xl_multixact_create *xlrec = (xl_multixact_create *) rec; int i; ! appendStringInfo(buf, XID_FMT " offset " INT64_FORMAT " nmembers %d: ", ! xlrec->mid, xlrec->moff, xlrec->nmembers); for (i = 0; i < xlrec->nmembers; i++) out_member(buf, &xlrec->members[i]); } *************** multixact_desc(StringInfo buf, XLogReade *** 74,80 **** { xl_multixact_truncate *xlrec = (xl_multixact_truncate *) rec; ! appendStringInfo(buf, "offsets [%u, %u), members [%u, %u)", xlrec->startTruncOff, xlrec->endTruncOff, xlrec->startTruncMemb, xlrec->endTruncMemb); } --- 74,80 ---- { xl_multixact_truncate *xlrec = (xl_multixact_truncate *) rec; ! appendStringInfo(buf, "offsets [" XID_FMT ", " XID_FMT "), members [" INT64_FORMAT ", " INT64_FORMAT ")", xlrec->startTruncOff, xlrec->endTruncOff, xlrec->startTruncMemb, xlrec->endTruncMemb); } diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c new file mode 100644 index a3e1331..2347079 *** a/src/backend/access/rmgrdesc/nbtdesc.c --- b/src/backend/access/rmgrdesc/nbtdesc.c *************** btree_desc(StringInfo buf, XLogReaderSta *** 70,76 **** { xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec; ! appendStringInfo(buf, "left %u; right %u; btpo_xact %u; ", xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact); appendStringInfo(buf, "leafleft %u; leafright %u; topparent %u", --- 70,76 ---- { xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec; ! appendStringInfo(buf, "left %u; right %u; btpo_xact " XID_FMT "; ", xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact); appendStringInfo(buf, "leafleft %u; leafright %u; topparent %u", *************** btree_desc(StringInfo buf, XLogReaderSta *** 89,95 **** { xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) rec; ! appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->latestRemovedXid); break; --- 89,95 ---- { xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) rec; ! appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid " XID_FMT, xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->latestRemovedXid); break; diff --git a/src/backend/access/rmgrdesc/spgdesc.c b/src/backend/access/rmgrdesc/spgdesc.c new file mode 100644 index 41ed84b..13f35b2 *** a/src/backend/access/rmgrdesc/spgdesc.c --- b/src/backend/access/rmgrdesc/spgdesc.c *************** spg_desc(StringInfo buf, XLogReaderState *** 75,81 **** /* no further information */ break; case XLOG_SPGIST_VACUUM_REDIRECT: ! appendStringInfo(buf, "newest XID %u", ((spgxlogVacuumRedirect *) rec)->newestRedirectXid); break; } --- 75,81 ---- /* no further information */ break; case XLOG_SPGIST_VACUUM_REDIRECT: ! appendStringInfo(buf, "newest XID " XID_FMT, ((spgxlogVacuumRedirect *) rec)->newestRedirectXid); break; } diff --git a/src/backend/access/rmgrdesc/standbydesc.c b/src/backend/access/rmgrdesc/standbydesc.c new file mode 100644 index 278546a..38222ed *** a/src/backend/access/rmgrdesc/standbydesc.c --- b/src/backend/access/rmgrdesc/standbydesc.c *************** standby_desc_running_xacts(StringInfo bu *** 21,27 **** { int i; ! appendStringInfo(buf, "nextXid %u latestCompletedXid %u oldestRunningXid %u", xlrec->nextXid, xlrec->latestCompletedXid, xlrec->oldestRunningXid); --- 21,27 ---- { int i; ! appendStringInfo(buf, "nextXid " XID_FMT " latestCompletedXid " XID_FMT " oldestRunningXid " XID_FMT, xlrec->nextXid, xlrec->latestCompletedXid, xlrec->oldestRunningXid); *************** standby_desc_running_xacts(StringInfo bu *** 29,35 **** { appendStringInfo(buf, "; %d xacts:", xlrec->xcnt); for (i = 0; i < xlrec->xcnt; i++) ! appendStringInfo(buf, " %u", xlrec->xids[i]); } if (xlrec->subxid_overflow) --- 29,35 ---- { appendStringInfo(buf, "; %d xacts:", xlrec->xcnt); for (i = 0; i < xlrec->xcnt; i++) ! appendStringInfo(buf, " " XID_FMT, xlrec->xids[i]); } if (xlrec->subxid_overflow) *************** standby_desc(StringInfo buf, XLogReaderS *** 48,54 **** int i; for (i = 0; i < xlrec->nlocks; i++) ! appendStringInfo(buf, "xid %u db %u rel %u ", xlrec->locks[i].xid, xlrec->locks[i].dbOid, xlrec->locks[i].relOid); } --- 48,54 ---- int i; for (i = 0; i < xlrec->nlocks; i++) ! appendStringInfo(buf, "xid " XID_FMT " db %u rel %u ", xlrec->locks[i].xid, xlrec->locks[i].dbOid, xlrec->locks[i].relOid); } diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c new file mode 100644 index 3aafa79..977013b *** a/src/backend/access/rmgrdesc/xactdesc.c --- b/src/backend/access/rmgrdesc/xactdesc.c *************** ParseCommitRecord(uint8 info, xl_xact_co *** 73,78 **** --- 73,87 ---- data += parsed->nsubxacts * sizeof(TransactionId); } + if (parsed->xinfo & XACT_XINFO_HAS_TWOPHASE) + { + xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; + + parsed->twophase_xid = xl_twophase->xid; + + data += sizeof(xl_xact_twophase); + } + if (parsed->xinfo & XACT_XINFO_HAS_RELFILENODES) { xl_xact_relfilenodes *xl_relfilenodes = (xl_xact_relfilenodes *) data; *************** ParseCommitRecord(uint8 info, xl_xact_co *** 95,109 **** data += xl_invals->nmsgs * sizeof(SharedInvalidationMessage); } - if (parsed->xinfo & XACT_XINFO_HAS_TWOPHASE) - { - xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; - - parsed->twophase_xid = xl_twophase->xid; - - data += sizeof(xl_xact_twophase); - } - if (parsed->xinfo & XACT_XINFO_HAS_ORIGIN) { xl_xact_origin xl_origin; --- 104,109 ---- *************** ParseAbortRecord(uint8 info, xl_xact_abo *** 150,155 **** --- 150,164 ---- data += parsed->nsubxacts * sizeof(TransactionId); } + if (parsed->xinfo & XACT_XINFO_HAS_TWOPHASE) + { + xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; + + parsed->twophase_xid = xl_twophase->xid; + + data += sizeof(xl_xact_twophase); + } + if (parsed->xinfo & XACT_XINFO_HAS_RELFILENODES) { xl_xact_relfilenodes *xl_relfilenodes = (xl_xact_relfilenodes *) data; *************** ParseAbortRecord(uint8 info, xl_xact_abo *** 160,174 **** data += MinSizeOfXactRelfilenodes; data += xl_relfilenodes->nrels * sizeof(RelFileNode); } - - if (parsed->xinfo & XACT_XINFO_HAS_TWOPHASE) - { - xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; - - parsed->twophase_xid = xl_twophase->xid; - - data += sizeof(xl_xact_twophase); - } } static void --- 169,174 ---- *************** xact_desc_commit(StringInfo buf, uint8 i *** 181,187 **** /* If this is a prepared xact, show the xid of the original xact */ if (TransactionIdIsValid(parsed.twophase_xid)) ! appendStringInfo(buf, "%u: ", parsed.twophase_xid); appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time)); --- 181,187 ---- /* If this is a prepared xact, show the xid of the original xact */ if (TransactionIdIsValid(parsed.twophase_xid)) ! appendStringInfo(buf, XID_FMT ": ", parsed.twophase_xid); appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time)); *************** xact_desc_commit(StringInfo buf, uint8 i *** 200,206 **** { appendStringInfoString(buf, "; subxacts:"); for (i = 0; i < parsed.nsubxacts; i++) ! appendStringInfo(buf, " %u", parsed.subxacts[i]); } if (parsed.nmsgs > 0) { --- 200,206 ---- { appendStringInfoString(buf, "; subxacts:"); for (i = 0; i < parsed.nsubxacts; i++) ! appendStringInfo(buf, " " XID_FMT, parsed.subxacts[i]); } if (parsed.nmsgs > 0) { *************** xact_desc_abort(StringInfo buf, uint8 in *** 232,238 **** /* If this is a prepared xact, show the xid of the original xact */ if (TransactionIdIsValid(parsed.twophase_xid)) ! appendStringInfo(buf, "%u: ", parsed.twophase_xid); appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time)); if (parsed.nrels > 0) --- 232,238 ---- /* If this is a prepared xact, show the xid of the original xact */ if (TransactionIdIsValid(parsed.twophase_xid)) ! appendStringInfo(buf, XID_FMT ": ", parsed.twophase_xid); appendStringInfoString(buf, timestamptz_to_str(xlrec->xact_time)); if (parsed.nrels > 0) *************** xact_desc_abort(StringInfo buf, uint8 in *** 251,257 **** { appendStringInfoString(buf, "; subxacts:"); for (i = 0; i < parsed.nsubxacts; i++) ! appendStringInfo(buf, " %u", parsed.subxacts[i]); } } --- 251,257 ---- { appendStringInfoString(buf, "; subxacts:"); for (i = 0; i < parsed.nsubxacts; i++) ! appendStringInfo(buf, " " XID_FMT, parsed.subxacts[i]); } } *************** xact_desc_assignment(StringInfo buf, xl_ *** 263,269 **** appendStringInfoString(buf, "subxacts:"); for (i = 0; i < xlrec->nsubxacts; i++) ! appendStringInfo(buf, " %u", xlrec->xsub[i]); } void --- 263,269 ---- appendStringInfoString(buf, "subxacts:"); for (i = 0; i < xlrec->nsubxacts; i++) ! appendStringInfo(buf, " " XID_FMT, xlrec->xsub[i]); } void *************** xact_desc(StringInfo buf, XLogReaderStat *** 294,300 **** * interested in the top-level xid that issued the record and which * xids are being reported here. */ ! appendStringInfo(buf, "xtop %u: ", xlrec->xtop); xact_desc_assignment(buf, xlrec); } } --- 294,300 ---- * interested in the top-level xid that issued the record and which * xids are being reported here. */ ! appendStringInfo(buf, "xtop " XID_FMT ": ", xlrec->xtop); xact_desc_assignment(buf, xlrec); } } diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c new file mode 100644 index f72f076..dfd06fe *** a/src/backend/access/rmgrdesc/xlogdesc.c --- b/src/backend/access/rmgrdesc/xlogdesc.c *************** xlog_desc(StringInfo buf, XLogReaderStat *** 44,58 **** CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "redo %X/%X; " ! "tli %u; prev tli %u; fpw %s; xid %u:%u; oid %u; multi %u; offset %u; " ! "oldest xid %u in DB %u; oldest multi %u in DB %u; " ! "oldest/newest commit timestamp xid: %u/%u; " ! "oldest running xid %u; %s", (uint32) (checkpoint->redo >> 32), (uint32) checkpoint->redo, checkpoint->ThisTimeLineID, checkpoint->PrevTimeLineID, checkpoint->fullPageWrites ? "true" : "false", ! checkpoint->nextXidEpoch, checkpoint->nextXid, checkpoint->nextOid, checkpoint->nextMulti, checkpoint->nextMultiOffset, --- 44,58 ---- CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "redo %X/%X; " ! "tli %u; prev tli %u; fpw %s; xid " XID_FMT "; oid %u; multi " XID_FMT "; offset " INT64_FORMAT "; " ! "oldest xid " XID_FMT " in DB %u; oldest multi " XID_FMT " in DB %u; " ! "oldest/newest commit timestamp xid: " XID_FMT "/" XID_FMT "; " ! "oldest running xid " XID_FMT "; %s", (uint32) (checkpoint->redo >> 32), (uint32) checkpoint->redo, checkpoint->ThisTimeLineID, checkpoint->PrevTimeLineID, checkpoint->fullPageWrites ? "true" : "false", ! checkpoint->nextXid, checkpoint->nextOid, checkpoint->nextMulti, checkpoint->nextMultiOffset, diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c new file mode 100644 index 9003b22..5575b94 *** a/src/backend/access/transam/clog.c --- b/src/backend/access/transam/clog.c *************** static SlruCtlData ClogCtlData; *** 88,101 **** #define ClogCtl (&ClogCtlData) ! static int ZeroCLOGPage(int pageno, bool writeXlog); ! static bool CLOGPagePrecedes(int page1, int page2); ! static void WriteZeroPageXlogRec(int pageno); ! static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact, Oid oldestXidDb); static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, ! XLogRecPtr lsn, int pageno, bool all_xact_same_page); static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno); --- 88,100 ---- #define ClogCtl (&ClogCtlData) ! static int ZeroCLOGPage(int64 pageno, bool writeXlog); ! static void WriteZeroPageXlogRec(int64 pageno); ! static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact, Oid oldestXidDb); static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, ! XLogRecPtr lsn, int64 pageno, bool all_xact_same_page); static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno); *************** void *** 163,169 **** TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn) { ! int pageno = TransactionIdToPage(xid); /* get page of parent */ int i; Assert(status == TRANSACTION_STATUS_COMMITTED || --- 162,168 ---- TransactionIdSetTreeStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn) { ! int64 pageno = TransactionIdToPage(xid); /* get page of parent */ int i; Assert(status == TRANSACTION_STATUS_COMMITTED || *************** static void *** 237,243 **** set_status_by_pages(int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn) { ! int pageno = TransactionIdToPage(subxids[0]); int offset = 0; int i = 0; --- 236,242 ---- set_status_by_pages(int nsubxids, TransactionId *subxids, XidStatus status, XLogRecPtr lsn) { ! int64 pageno = TransactionIdToPage(subxids[0]); int offset = 0; int i = 0; *************** set_status_by_pages(int nsubxids, Transa *** 266,272 **** static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, ! XLogRecPtr lsn, int pageno, bool all_xact_same_page) { /* Can't use group update when PGPROC overflows. */ --- 265,271 ---- static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, TransactionId *subxids, XidStatus status, ! XLogRecPtr lsn, int64 pageno, bool all_xact_same_page) { /* Can't use group update when PGPROC overflows. */ *************** TransactionIdSetStatusBit(TransactionId *** 634,640 **** XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) { ! int pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; int slotno; --- 633,639 ---- XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) { ! int64 pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; int slotno; *************** CLOGShmemSize(void) *** 691,697 **** void CLOGShmemInit(void) { - ClogCtl->PagePrecedes = CLOGPagePrecedes; SimpleLruInit(ClogCtl, "clog", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, CLogControlLock, "pg_xact", LWTRANCHE_CLOG_BUFFERS); } --- 690,695 ---- *************** BootStrapCLOG(void) *** 729,735 **** * Control lock must be held at entry, and will be held at exit. */ static int ! ZeroCLOGPage(int pageno, bool writeXlog) { int slotno; --- 727,733 ---- * Control lock must be held at entry, and will be held at exit. */ static int ! ZeroCLOGPage(int64 pageno, bool writeXlog) { int slotno; *************** void *** 749,755 **** StartupCLOG(void) { TransactionId xid = ShmemVariableCache->nextXid; ! int pageno = TransactionIdToPage(xid); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); --- 747,753 ---- StartupCLOG(void) { TransactionId xid = ShmemVariableCache->nextXid; ! int64 pageno = TransactionIdToPage(xid); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); *************** void *** 768,774 **** TrimCLOG(void) { TransactionId xid = ShmemVariableCache->nextXid; ! int pageno = TransactionIdToPage(xid); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); --- 766,772 ---- TrimCLOG(void) { TransactionId xid = ShmemVariableCache->nextXid; ! int64 pageno = TransactionIdToPage(xid); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); *************** CheckPointCLOG(void) *** 860,866 **** void ExtendCLOG(TransactionId newestXact) { ! int pageno; /* * No work except at first XID of a page. But beware: just after --- 858,864 ---- void ExtendCLOG(TransactionId newestXact) { ! int64 pageno; /* * No work except at first XID of a page. But beware: just after *************** ExtendCLOG(TransactionId newestXact) *** 899,905 **** void TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) { ! int cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We --- 897,903 ---- void TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) { ! int64 cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We *************** TruncateCLOG(TransactionId oldestXact, O *** 932,970 **** SimpleLruTruncate(ClogCtl, cutoffPage); } - - /* - * Decide which of two CLOG page numbers is "older" for truncation purposes. - * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, - * offset both xids by FirstNormalTransactionId to avoid that. - */ - static bool - CLOGPagePrecedes(int page1, int page2) - { - TransactionId xid1; - TransactionId xid2; - - xid1 = ((TransactionId) page1) * CLOG_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId; - xid2 = ((TransactionId) page2) * CLOG_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId; - - return TransactionIdPrecedes(xid1, xid2); - } - - /* * Write a ZEROPAGE xlog record */ static void ! WriteZeroPageXlogRec(int pageno) { XLogBeginInsert(); ! XLogRegisterData((char *) (&pageno), sizeof(int)); (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE); } --- 930,943 ---- SimpleLruTruncate(ClogCtl, cutoffPage); } /* * Write a ZEROPAGE xlog record */ static void ! WriteZeroPageXlogRec(int64 pageno) { XLogBeginInsert(); ! XLogRegisterData((char *) (&pageno), sizeof(int64)); (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE); } *************** WriteZeroPageXlogRec(int pageno) *** 975,981 **** * in TruncateCLOG(). */ static void ! WriteTruncateXlogRec(int pageno, TransactionId oldestXact, Oid oldestXactDb) { XLogRecPtr recptr; xl_clog_truncate xlrec; --- 948,954 ---- * in TruncateCLOG(). */ static void ! WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact, Oid oldestXactDb) { XLogRecPtr recptr; xl_clog_truncate xlrec; *************** clog_redo(XLogReaderState *record) *** 1003,1012 **** if (info == CLOG_ZEROPAGE) { ! int pageno; int slotno; ! memcpy(&pageno, XLogRecGetData(record), sizeof(int)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); --- 976,985 ---- if (info == CLOG_ZEROPAGE) { ! int64 pageno; int slotno; ! memcpy(&pageno, XLogRecGetData(record), sizeof(int64)); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c new file mode 100644 index 60fb9ee..ef32641 *** a/src/backend/access/transam/commit_ts.c --- b/src/backend/access/transam/commit_ts.c *************** bool track_commit_timestamp; *** 104,119 **** static void SetXidCommitTsInPage(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz ts, ! RepOriginId nodeid, int pageno); static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts, RepOriginId nodeid, int slotno); static void error_commit_ts_disabled(void); ! static int ZeroCommitTsPage(int pageno, bool writeXlog); ! static bool CommitTsPagePrecedes(int page1, int page2); static void ActivateCommitTs(void); static void DeactivateCommitTs(void); ! static void WriteZeroPageXlogRec(int pageno); ! static void WriteTruncateXlogRec(int pageno, TransactionId oldestXid); static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids, TransactionId *subxids, TimestampTz timestamp, RepOriginId nodeid); --- 104,118 ---- static void SetXidCommitTsInPage(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz ts, ! RepOriginId nodeid, int64 pageno); static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts, RepOriginId nodeid, int slotno); static void error_commit_ts_disabled(void); ! static int ZeroCommitTsPage(int64 pageno, bool writeXlog); static void ActivateCommitTs(void); static void DeactivateCommitTs(void); ! static void WriteZeroPageXlogRec(int64 pageno); ! static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid); static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids, TransactionId *subxids, TimestampTz timestamp, RepOriginId nodeid); *************** TransactionTreeSetCommitTsData(Transacti *** 186,192 **** */ for (i = 0, headxid = xid;;) { ! int pageno = TransactionIdToCTsPage(headxid); int j; for (j = i; j < nsubxids; j++) --- 185,191 ---- */ for (i = 0, headxid = xid;;) { ! int64 pageno = TransactionIdToCTsPage(headxid); int j; for (j = i; j < nsubxids; j++) *************** TransactionTreeSetCommitTsData(Transacti *** 230,236 **** static void SetXidCommitTsInPage(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz ts, ! RepOriginId nodeid, int pageno) { int slotno; int i; --- 229,235 ---- static void SetXidCommitTsInPage(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz ts, ! RepOriginId nodeid, int64 pageno) { int slotno; int i; *************** bool *** 282,288 **** TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts, RepOriginId *nodeid) { ! int pageno = TransactionIdToCTsPage(xid); int entryno = TransactionIdToCTsEntry(xid); int slotno; CommitTimestampEntry entry; --- 281,287 ---- TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts, RepOriginId *nodeid) { ! int64 pageno = TransactionIdToCTsPage(xid); int entryno = TransactionIdToCTsEntry(xid); int slotno; CommitTimestampEntry entry; *************** TransactionIdGetCommitTsData(Transaction *** 292,298 **** if (!TransactionIdIsValid(xid)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("cannot retrieve commit timestamp for transaction %u", xid))); else if (!TransactionIdIsNormal(xid)) { /* frozen and bootstrap xids are always committed far in the past */ --- 291,297 ---- if (!TransactionIdIsValid(xid)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("cannot retrieve commit timestamp for transaction " XID_FMT, xid))); else if (!TransactionIdIsNormal(xid)) { /* frozen and bootstrap xids are always committed far in the past */ *************** error_commit_ts_disabled(void) *** 404,410 **** Datum pg_xact_commit_timestamp(PG_FUNCTION_ARGS) { ! TransactionId xid = PG_GETARG_UINT32(0); TimestampTz ts; bool found; --- 403,409 ---- Datum pg_xact_commit_timestamp(PG_FUNCTION_ARGS) { ! TransactionId xid = PG_GETARG_TRANSACTIONID(0); TimestampTz ts; bool found; *************** CommitTsShmemInit(void) *** 491,497 **** { bool found; - CommitTsCtl->PagePrecedes = CommitTsPagePrecedes; SimpleLruInit(CommitTsCtl, "commit_timestamp", CommitTsShmemBuffers(), 0, CommitTsControlLock, "pg_commit_ts", LWTRANCHE_COMMITTS_BUFFERS); --- 490,495 ---- *************** BootStrapCommitTs(void) *** 539,545 **** * Control lock must be held at entry, and will be held at exit. */ static int ! ZeroCommitTsPage(int pageno, bool writeXlog) { int slotno; --- 537,543 ---- * Control lock must be held at entry, and will be held at exit. */ static int ! ZeroCommitTsPage(int64 pageno, bool writeXlog) { int slotno; *************** static void *** 633,639 **** ActivateCommitTs(void) { TransactionId xid; ! int pageno; /* If we've done this already, there's nothing to do */ LWLockAcquire(CommitTsLock, LW_EXCLUSIVE); --- 631,637 ---- ActivateCommitTs(void) { TransactionId xid; ! int64 pageno; /* If we've done this already, there's nothing to do */ LWLockAcquire(CommitTsLock, LW_EXCLUSIVE); *************** CheckPointCommitTs(void) *** 784,790 **** void ExtendCommitTs(TransactionId newestXact) { ! int pageno; /* * Nothing to do if module not enabled. Note we do an unlocked read of --- 782,788 ---- void ExtendCommitTs(TransactionId newestXact) { ! int64 pageno; /* * Nothing to do if module not enabled. Note we do an unlocked read of *************** ExtendCommitTs(TransactionId newestXact) *** 822,828 **** void TruncateCommitTs(TransactionId oldestXact) { ! int cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We --- 820,826 ---- void TruncateCommitTs(TransactionId oldestXact) { ! int64 cutoffPage; /* * The cutoff point is the start of the segment containing oldestXact. We *************** AdvanceOldestCommitTsXid(TransactionId o *** 884,920 **** /* - * Decide which of two CLOG page numbers is "older" for truncation purposes. - * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, if we are asked about - * page number zero, we don't want to hand InvalidTransactionId to - * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, - * offset both xids by FirstNormalTransactionId to avoid that. - */ - static bool - CommitTsPagePrecedes(int page1, int page2) - { - TransactionId xid1; - TransactionId xid2; - - xid1 = ((TransactionId) page1) * COMMIT_TS_XACTS_PER_PAGE; - xid1 += FirstNormalTransactionId; - xid2 = ((TransactionId) page2) * COMMIT_TS_XACTS_PER_PAGE; - xid2 += FirstNormalTransactionId; - - return TransactionIdPrecedes(xid1, xid2); - } - - - /* * Write a ZEROPAGE xlog record */ static void ! WriteZeroPageXlogRec(int pageno) { XLogBeginInsert(); ! XLogRegisterData((char *) (&pageno), sizeof(int)); (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE); } --- 882,894 ---- /* * Write a ZEROPAGE xlog record */ static void ! WriteZeroPageXlogRec(int64 pageno) { XLogBeginInsert(); ! XLogRegisterData((char *) (&pageno), sizeof(int64)); (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE); } *************** WriteZeroPageXlogRec(int pageno) *** 922,928 **** * Write a TRUNCATE xlog record */ static void ! WriteTruncateXlogRec(int pageno, TransactionId oldestXid) { xl_commit_ts_truncate xlrec; --- 896,902 ---- * Write a TRUNCATE xlog record */ static void ! WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid) { xl_commit_ts_truncate xlrec; *************** commit_ts_redo(XLogReaderState *record) *** 969,978 **** if (info == COMMIT_TS_ZEROPAGE) { ! int pageno; int slotno; ! memcpy(&pageno, XLogRecGetData(record), sizeof(int)); LWLockAcquire(CommitTsControlLock, LW_EXCLUSIVE); --- 943,952 ---- if (info == COMMIT_TS_ZEROPAGE) { ! int64 pageno; int slotno; ! memcpy(&pageno, XLogRecGetData(record), sizeof(int64)); LWLockAcquire(CommitTsControlLock, LW_EXCLUSIVE); diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c new file mode 100644 index 7142ece..20aba1a *** a/src/backend/access/transam/multixact.c --- b/src/backend/access/transam/multixact.c *************** *** 112,126 **** ((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE) #define MultiXactIdToOffsetEntry(xid) \ ((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE) ! #define MultiXactIdToOffsetSegment(xid) (MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT) /* * The situation for members is a bit more complex: we store one byte of * additional flag bits for each TransactionId. To do this without getting ! * into alignment issues, we store four bytes of flags, and then the ! * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and ! * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups ! * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and * performance) trumps space efficiency here. * * Note that the "offset" macros work with byte offset, not array indexes, so --- 112,126 ---- ((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE) #define MultiXactIdToOffsetEntry(xid) \ ((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE) ! #define MultiXactIdToOffsetSegment(xid) ((uint64)(MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT)) /* * The situation for members is a bit more complex: we store one byte of * additional flag bits for each TransactionId. To do this without getting ! * into alignment issues, we store eight bytes of flags, and then the ! * corresponding 8 Xids. Each such 9-word (72-byte) set we call a "group", and ! * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 113 groups ! * per page. This wastes 56 bytes per page, but that's OK -- simplicity (and * performance) trumps space efficiency here. * * Note that the "offset" macros work with byte offset, not array indexes, so *************** *** 132,138 **** #define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) /* how many full bytes of flags are there in a group? */ ! #define MULTIXACT_FLAGBYTES_PER_GROUP 4 #define MULTIXACT_MEMBERS_PER_MEMBERGROUP \ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE) /* size in bytes of a complete group */ --- 132,138 ---- #define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) /* how many full bytes of flags are there in a group? */ ! #define MULTIXACT_FLAGBYTES_PER_GROUP 8 #define MULTIXACT_MEMBERS_PER_MEMBERGROUP \ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE) /* size in bytes of a complete group */ *************** *** 142,163 **** #define MULTIXACT_MEMBERS_PER_PAGE \ (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP) - /* - * Because the number of items per page is not a divisor of the last item - * number (member 0xFFFFFFFF), the last segment does not use the maximum number - * of pages, and moreover the last used page therein does not use the same - * number of items as previous pages. (Another way to say it is that the - * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page - * has some empty space after that item.) - * - * This constant is the number of members in the last page of the last segment. - */ - #define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \ - ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1)) - /* page in which a member is to be found */ #define MXOffsetToMemberPage(xid) ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE) ! #define MXOffsetToMemberSegment(xid) (MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT) /* Location (byte offset within page) of flag word for a given member */ #define MXOffsetToFlagsOffset(xid) \ --- 142,150 ---- #define MULTIXACT_MEMBERS_PER_PAGE \ (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP) /* page in which a member is to be found */ #define MXOffsetToMemberPage(xid) ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE) ! #define MXOffsetToMemberSegment(xid) ((uint64)(MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT)) /* Location (byte offset within page) of flag word for a given member */ #define MXOffsetToFlagsOffset(xid) \ *************** typedef struct MultiXactStateData *** 216,237 **** MultiXactId oldestMultiXactId; Oid oldestMultiXactDB; - /* - * Oldest multixact offset that is potentially referenced by a multixact - * referenced by a relation. We don't always know this value, so there's - * a flag here to indicate whether or not we currently do. - */ - MultiXactOffset oldestOffset; - bool oldestOffsetKnown; - /* support for anti-wraparound measures */ MultiXactId multiVacLimit; - MultiXactId multiWarnLimit; - MultiXactId multiStopLimit; - MultiXactId multiWrapLimit; - - /* support for members anti-wraparound measures */ - MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */ /* * Per-backend data starts here. We have two arrays stored in the area --- 203,210 ---- *************** static void mXactCachePut(MultiXactId mu *** 353,371 **** static char *mxstatus_to_string(MultiXactStatus status); /* management of SLRU infrastructure */ ! static int ZeroMultiXactOffsetPage(int pageno, bool writeXlog); ! static int ZeroMultiXactMemberPage(int pageno, bool writeXlog); ! static bool MultiXactOffsetPagePrecedes(int page1, int page2); ! static bool MultiXactMemberPagePrecedes(int page1, int page2); ! static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, ! MultiXactOffset offset2); static void ExtendMultiXactOffset(MultiXactId multi); static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers); - static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary, - MultiXactOffset start, uint32 distance); - static bool SetOffsetVacuumLimit(bool is_startup); static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result); ! static void WriteMZeroPageXlogRec(int pageno, uint8 info); static void WriteMTruncateXlogRec(Oid oldestMultiDB, MultiXactId startOff, MultiXactId endOff, MultiXactOffset startMemb, MultiXactOffset endMemb); --- 326,337 ---- static char *mxstatus_to_string(MultiXactStatus status); /* management of SLRU infrastructure */ ! static int ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog); ! static int ZeroMultiXactMemberPage(int64 pageno, bool writeXlog); static void ExtendMultiXactOffset(MultiXactId multi); static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers); static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result); ! static void WriteMZeroPageXlogRec(int64 pageno, uint8 info); static void WriteMTruncateXlogRec(Oid oldestMultiDB, MultiXactId startOff, MultiXactId endOff, MultiXactOffset startMemb, MultiXactOffset endMemb); *************** MultiXactIdExpand(MultiXactId multi, Tra *** 449,455 **** /* MultiXactIdSetOldestMember() must have been called already. */ Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId])); ! debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s", multi, xid, mxstatus_to_string(status)); /* --- 415,421 ---- /* MultiXactIdSetOldestMember() must have been called already. */ Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId])); ! debug_elog5(DEBUG2, "Expand: received multi " XID_FMT ", xid " XID_FMT " status %s", multi, xid, mxstatus_to_string(status)); /* *************** MultiXactIdExpand(MultiXactId multi, Tra *** 474,480 **** member.status = status; newMulti = MultiXactIdCreateFromMembers(1, &member); ! debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u", multi, newMulti); return newMulti; } --- 440,446 ---- member.status = status; newMulti = MultiXactIdCreateFromMembers(1, &member); ! debug_elog4(DEBUG2, "Expand: " XID_FMT " has no members, create singleton " XID_FMT, multi, newMulti); return newMulti; } *************** MultiXactIdExpand(MultiXactId multi, Tra *** 488,494 **** if (TransactionIdEquals(members[i].xid, xid) && (members[i].status == status)) { ! debug_elog4(DEBUG2, "Expand: %u is already a member of %u", xid, multi); pfree(members); return multi; --- 454,460 ---- if (TransactionIdEquals(members[i].xid, xid) && (members[i].status == status)) { ! debug_elog4(DEBUG2, "Expand: " XID_FMT " is already a member of " XID_FMT, xid, multi); pfree(members); return multi; *************** MultiXactIdExpand(MultiXactId multi, Tra *** 529,535 **** pfree(members); pfree(newMembers); ! debug_elog3(DEBUG2, "Expand: returning new multi %u", newMulti); return newMulti; } --- 495,501 ---- pfree(members); pfree(newMembers); ! debug_elog3(DEBUG2, "Expand: returning new multi " XID_FMT, newMulti); return newMulti; } *************** MultiXactIdIsRunning(MultiXactId multi, *** 552,558 **** int nmembers; int i; ! debug_elog3(DEBUG2, "IsRunning %u?", multi); /* * "false" here means we assume our callers have checked that the given --- 518,524 ---- int nmembers; int i; ! debug_elog3(DEBUG2, "IsRunning " XID_FMT "?", multi); /* * "false" here means we assume our callers have checked that the given *************** MultiXactIdIsRunning(MultiXactId multi, *** 592,598 **** { if (TransactionIdIsInProgress(members[i].xid)) { ! debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running", i, members[i].xid); pfree(members); return true; --- 558,564 ---- { if (TransactionIdIsInProgress(members[i].xid)) { ! debug_elog4(DEBUG2, "IsRunning: member %d (" XID_FMT ") is running", i, members[i].xid); pfree(members); return true; *************** MultiXactIdIsRunning(MultiXactId multi, *** 601,607 **** pfree(members); ! debug_elog3(DEBUG2, "IsRunning: %u is not running", multi); return false; } --- 567,573 ---- pfree(members); ! debug_elog3(DEBUG2, "IsRunning: " XID_FMT " is not running", multi); return false; } *************** MultiXactIdSetOldestMember(void) *** 655,661 **** LWLockRelease(MultiXactGenLock); ! debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = %u", MyBackendId, nextMXact); } } --- 621,627 ---- LWLockRelease(MultiXactGenLock); ! debug_elog4(DEBUG2, "MultiXact: setting OldestMember[%d] = " XID_FMT, MyBackendId, nextMXact); } } *************** MultiXactIdSetOldestVisible(void) *** 708,714 **** LWLockRelease(MultiXactGenLock); ! debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = %u", MyBackendId, oldestMXact); } } --- 674,680 ---- LWLockRelease(MultiXactGenLock); ! debug_elog4(DEBUG2, "MultiXact: setting OldestVisible[%d] = " XID_FMT, MyBackendId, oldestMXact); } } *************** ReadNextMultiXactId(void) *** 727,735 **** mxid = MultiXactState->nextMXact; LWLockRelease(MultiXactGenLock); - if (mxid < FirstMultiXactId) - mxid = FirstMultiXactId; - return mxid; } --- 693,698 ---- *************** static void *** 841,848 **** RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nmembers, MultiXactMember *members) { ! int pageno; ! int prev_pageno; int entryno; int slotno; MultiXactOffset *offptr; --- 804,811 ---- RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, int nmembers, MultiXactMember *members) { ! int64 pageno; ! int64 prev_pageno; int entryno; int slotno; MultiXactOffset *offptr; *************** RecordNewMultiXact(MultiXactId multi, Mu *** 878,885 **** for (i = 0; i < nmembers; i++, offset++) { TransactionId *memberptr; ! uint32 *flagsptr; ! uint32 flagsval; int bshift; int flagsoff; int memberoff; --- 841,848 ---- for (i = 0; i < nmembers; i++, offset++) { TransactionId *memberptr; ! uint64 *flagsptr; ! uint64 flagsval; int bshift; int flagsoff; int memberoff; *************** RecordNewMultiXact(MultiXactId multi, Mu *** 900,913 **** memberptr = (TransactionId *) (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff); *memberptr = members[i].xid; ! flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); flagsval = *flagsptr; ! flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift); ! flagsval |= (members[i].status << bshift); *flagsptr = flagsval; MultiXactMemberCtl->shared->page_dirty[slotno] = true; --- 863,877 ---- memberptr = (TransactionId *) (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff); + *memberptr = members[i].xid; ! flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); flagsval = *flagsptr; ! flagsval &= ~((uint64)((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift); ! flagsval |= ((uint64)members[i].status << bshift); *flagsptr = flagsval; MultiXactMemberCtl->shared->page_dirty[slotno] = true; *************** GetNewMultiXactId(int nmembers, MultiXac *** 960,967 **** * If we're past multiVacLimit or the safe threshold for member storage * space, or we don't know what the safe threshold for member storage is, * start trying to force autovacuum cycles. - * If we're past multiWarnLimit, start issuing warnings. - * If we're past multiStopLimit, refuse to create new MultiXactIds. * * Note these are pretty much the same protections in GetNewTransactionId. *---------- --- 924,929 ---- *************** GetNewMultiXactId(int nmembers, MultiXac *** 975,1015 **** * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ - MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit; - MultiXactId multiStopLimit = MultiXactState->multiStopLimit; - MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit; - Oid oldest_datoid = MultiXactState->oldestMultiXactDB; - LWLockRelease(MultiXactGenLock); - if (IsUnderPostmaster && - !MultiXactIdPrecedes(result, multiStopLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* - * Immediately kick autovacuum into action as we're already in - * ERROR territory. - */ - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database \"%s\"", - oldest_datname), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions."))); - else - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database with OID %u", - oldest_datoid), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions."))); - } - /* * To avoid swamping the postmaster with signals, we issue the autovac * request only once per 64K multis generated. This still gives --- 937,944 ---- *************** GetNewMultiXactId(int nmembers, MultiXac *** 1018,1053 **** if (IsUnderPostmaster && (result % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - if (!MultiXactIdPrecedes(result, multiWarnLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(WARNING, - (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used", - "database \"%s\" must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - result, - oldest_datname, - multiWrapLimit - result), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions."))); - else - ereport(WARNING, - (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used", - "database with OID %u must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - result, - oldest_datoid, - multiWrapLimit - result), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions."))); - } - /* Re-acquire lock and start over */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); result = MultiXactState->nextMXact; - if (result < FirstMultiXactId) - result = FirstMultiXactId; } /* Make sure there is room for the MXID in the file. */ --- 947,955 ---- *************** GetNewMultiXactId(int nmembers, MultiXac *** 1059,1143 **** * GetMultiXactIdMembers() for motivation. */ nextOffset = MultiXactState->nextOffset; ! if (nextOffset == 0) ! { ! *offset = 1; ! nmembers++; /* allocate member slot 0 too */ ! } ! else ! *offset = nextOffset; ! ! /*---------- ! * Protect against overrun of the members space as well, with the ! * following rules: ! * ! * If we're past offsetStopLimit, refuse to generate more multis. ! * If we're close to offsetStopLimit, emit a warning. ! * ! * Arbitrarily, we start emitting warnings when we're 20 segments or less ! * from offsetStopLimit. ! * ! * Note we haven't updated the shared state yet, so if we fail at this ! * point, the multixact ID we grabbed can still be used by the next guy. ! * ! * Note that there is no point in forcing autovacuum runs here: the ! * multixact freeze settings would have to be reduced for that to have any ! * effect. ! *---------- ! */ ! #define OFFSET_WARN_SEGMENTS 20 ! if (MultiXactState->oldestOffsetKnown && ! MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, ! nmembers)) ! { ! /* see comment in the corresponding offsets wraparound case */ ! SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); ! ! ereport(ERROR, ! (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), ! errmsg("multixact \"members\" limit exceeded"), ! errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.", ! "This command would create a multixact with %u members, but the remaining space is only enough for %u members.", ! MultiXactState->offsetStopLimit - nextOffset - 1, ! nmembers, ! MultiXactState->offsetStopLimit - nextOffset - 1), ! errhint("Execute a database-wide VACUUM in database with OID %u with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.", ! MultiXactState->oldestMultiXactDB))); ! } ! ! /* ! * Check whether we should kick autovacuum into action, to prevent members ! * wraparound. NB we use a much larger window to trigger autovacuum than ! * just the warning limit. The warning is just a measure of last resort - ! * this is in line with GetNewTransactionId's behaviour. ! */ ! if (!MultiXactState->oldestOffsetKnown || ! (MultiXactState->nextOffset - MultiXactState->oldestOffset ! > MULTIXACT_MEMBER_SAFE_THRESHOLD)) ! { ! /* ! * To avoid swamping the postmaster with signals, we issue the autovac ! * request only when crossing a segment boundary. With default ! * compilation settings that's roughly after 50k members. This still ! * gives plenty of chances before we get into real trouble. ! */ ! if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) != ! (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT)) ! SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); ! } ! ! if (MultiXactState->oldestOffsetKnown && ! MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, ! nextOffset, ! nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS)) ! ereport(WARNING, ! (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), ! errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used", ! "database with OID %u must be vacuumed before %d more multixact members are used", ! MultiXactState->offsetStopLimit - nextOffset + nmembers, ! MultiXactState->oldestMultiXactDB, ! MultiXactState->offsetStopLimit - nextOffset + nmembers), ! errhint("Execute a database-wide VACUUM in that database with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings."))); ExtendMultiXactMember(nextOffset, nmembers); --- 961,967 ---- * GetMultiXactIdMembers() for motivation. */ nextOffset = MultiXactState->nextOffset; ! *offset = nextOffset; ExtendMultiXactMember(nextOffset, nmembers); *************** GetNewMultiXactId(int nmembers, MultiXac *** 1167,1173 **** LWLockRelease(MultiXactGenLock); ! debug_elog4(DEBUG2, "GetNew: returning %u offset %u", result, *offset); return result; } --- 991,997 ---- LWLockRelease(MultiXactGenLock); ! debug_elog4(DEBUG2, "GetNew: returning " XID_FMT " offset " INT64_FORMAT, result, *offset); return result; } *************** int *** 1202,1209 **** GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool onlyLock) { ! int pageno; ! int prev_pageno; int entryno; int slotno; MultiXactOffset *offptr; --- 1026,1033 ---- GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, bool from_pgupgrade, bool onlyLock) { ! int64 pageno; ! int64 prev_pageno; int entryno; int slotno; MultiXactOffset *offptr; *************** GetMultiXactIdMembers(MultiXactId multi, *** 1211,1223 **** int length; int truelength; int i; - MultiXactId oldestMXact; MultiXactId nextMXact; MultiXactId tmpMXact; MultiXactOffset nextOffset; MultiXactMember *ptr; ! debug_elog3(DEBUG2, "GetMembers: asked for %u", multi); if (!MultiXactIdIsValid(multi) || from_pgupgrade) return -1; --- 1035,1046 ---- int length; int truelength; int i; MultiXactId nextMXact; MultiXactId tmpMXact; MultiXactOffset nextOffset; MultiXactMember *ptr; ! debug_elog3(DEBUG2, "GetMembers: asked for " XID_FMT, multi); if (!MultiXactIdIsValid(multi) || from_pgupgrade) return -1; *************** GetMultiXactIdMembers(MultiXactId multi, *** 1264,1290 **** */ LWLockAcquire(MultiXactGenLock, LW_SHARED); - oldestMXact = MultiXactState->oldestMultiXactId; nextMXact = MultiXactState->nextMXact; nextOffset = MultiXactState->nextOffset; LWLockRelease(MultiXactGenLock); - if (MultiXactIdPrecedes(multi, oldestMXact)) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("MultiXactId %u does no longer exist -- apparent wraparound", - multi))); - return -1; - } - - if (!MultiXactIdPrecedes(multi, nextMXact)) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("MultiXactId %u has not been created yet -- apparent wraparound", - multi))); - /* * Find out the offset at which we need to start reading MultiXactMembers * and the number of members in the multixact. We determine the latter as --- 1087,1097 ---- *************** retry: *** 1346,1355 **** { MultiXactOffset nextMXOffset; - /* handle wraparound if needed */ - if (tmpMXact < FirstMultiXactId) - tmpMXact = FirstMultiXactId; - prev_pageno = pageno; pageno = MultiXactIdToOffsetPage(tmpMXact); --- 1153,1158 ---- *************** retry: *** 1387,1393 **** for (i = 0; i < length; i++, offset++) { TransactionId *xactptr; ! uint32 *flagsptr; int flagsoff; int bshift; int memberoff; --- 1190,1196 ---- for (i = 0; i < length; i++, offset++) { TransactionId *xactptr; ! uint64 *flagsptr; int flagsoff; int bshift; int memberoff; *************** retry: *** 1413,1419 **** flagsoff = MXOffsetToFlagsOffset(offset); bshift = MXOffsetToFlagsBitShift(offset); ! flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); ptr[truelength].xid = *xactptr; ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK; --- 1216,1222 ---- flagsoff = MXOffsetToFlagsOffset(offset); bshift = MXOffsetToFlagsBitShift(offset); ! flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); ptr[truelength].xid = *xactptr; ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK; *************** mXactCacheGetBySet(int nmembers, MultiXa *** 1493,1499 **** */ if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0) { ! debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi); dlist_move_head(&MXactCache, iter.cur); return entry->multi; } --- 1296,1302 ---- */ if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0) { ! debug_elog3(DEBUG2, "CacheGet: found " XID_FMT, entry->multi); dlist_move_head(&MXactCache, iter.cur); return entry->multi; } *************** mXactCacheGetById(MultiXactId multi, Mul *** 1516,1522 **** { dlist_iter iter; ! debug_elog3(DEBUG2, "CacheGet: looking for %u", multi); dlist_foreach(iter, &MXactCache) { --- 1319,1325 ---- { dlist_iter iter; ! debug_elog3(DEBUG2, "CacheGet: looking for " XID_FMT, multi); dlist_foreach(iter, &MXactCache) { *************** mXactCachePut(MultiXactId multi, int nme *** 1597,1603 **** MXactCacheMembers--; entry = dlist_container(mXactCacheEnt, node, node); ! debug_elog3(DEBUG2, "CachePut: pruning cached multi %u", entry->multi); pfree(entry); --- 1400,1406 ---- MXactCacheMembers--; entry = dlist_container(mXactCacheEnt, node, node); ! debug_elog3(DEBUG2, "CachePut: pruning cached multi " XID_FMT, entry->multi); pfree(entry); *************** mxid_to_string(MultiXactId multi, int nm *** 1639,1649 **** initStringInfo(&buf); ! appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid, mxstatus_to_string(members[0].status)); for (i = 1; i < nmembers; i++) ! appendStringInfo(&buf, ", %u (%s)", members[i].xid, mxstatus_to_string(members[i].status)); appendStringInfoChar(&buf, ']'); --- 1442,1452 ---- initStringInfo(&buf); ! appendStringInfo(&buf, XID_FMT " %d[" XID_FMT " (%s)", multi, nmembers, members[0].xid, mxstatus_to_string(members[0].status)); for (i = 1; i < nmembers; i++) ! appendStringInfo(&buf, ", " XID_FMT " (%s)", members[i].xid, mxstatus_to_string(members[i].status)); appendStringInfoChar(&buf, ']'); *************** MultiXactShmemInit(void) *** 1823,1831 **** debug_elog2(DEBUG2, "Shared Memory Init for MultiXact"); - MultiXactOffsetCtl->PagePrecedes = MultiXactOffsetPagePrecedes; - MultiXactMemberCtl->PagePrecedes = MultiXactMemberPagePrecedes; - SimpleLruInit(MultiXactOffsetCtl, "multixact_offset", NUM_MXACTOFFSET_BUFFERS, 0, MultiXactOffsetControlLock, "pg_multixact/offsets", --- 1626,1631 ---- *************** BootStrapMultiXact(void) *** 1900,1906 **** * Control lock must be held at entry, and will be held at exit. */ static int ! ZeroMultiXactOffsetPage(int pageno, bool writeXlog) { int slotno; --- 1700,1706 ---- * Control lock must be held at entry, and will be held at exit. */ static int ! ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog) { int slotno; *************** ZeroMultiXactOffsetPage(int pageno, bool *** 1916,1922 **** * Ditto, for MultiXactMember */ static int ! ZeroMultiXactMemberPage(int pageno, bool writeXlog) { int slotno; --- 1716,1722 ---- * Ditto, for MultiXactMember */ static int ! ZeroMultiXactMemberPage(int64 pageno, bool writeXlog) { int slotno; *************** ZeroMultiXactMemberPage(int pageno, bool *** 1946,1952 **** static void MaybeExtendOffsetSlru(void) { ! int pageno; pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact); --- 1746,1752 ---- static void MaybeExtendOffsetSlru(void) { ! int64 pageno; pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact); *************** StartupMultiXact(void) *** 1981,1987 **** { MultiXactId multi = MultiXactState->nextMXact; MultiXactOffset offset = MultiXactState->nextOffset; ! int pageno; /* * Initialize offset's idea of the latest page number. --- 1781,1787 ---- { MultiXactId multi = MultiXactState->nextMXact; MultiXactOffset offset = MultiXactState->nextOffset; ! int64 pageno; /* * Initialize offset's idea of the latest page number. *************** TrimMultiXact(void) *** 2006,2012 **** MultiXactOffset offset; MultiXactId oldestMXact; Oid oldestMXactDB; ! int pageno; int entryno; int flagsoff; --- 1806,1812 ---- MultiXactOffset offset; MultiXactId oldestMXact; Oid oldestMXactDB; ! int64 pageno; int entryno; int flagsoff; *************** MultiXactGetCheckptMulti(bool is_shutdow *** 2129,2135 **** LWLockRelease(MultiXactGenLock); debug_elog6(DEBUG2, ! "MultiXact: checkpoint is nextMulti %u, nextOffset %u, oldestMulti %u in DB %u", *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB); } --- 1929,1935 ---- LWLockRelease(MultiXactGenLock); debug_elog6(DEBUG2, ! "MultiXact: checkpoint is nextMulti " XID_FMT ", nextOffset " INT64_FORMAT ", oldestMulti " XID_FMT " in DB %u", *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB); } *************** void *** 2160,2166 **** MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset) { ! debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u", nextMulti, nextMultiOffset); LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); MultiXactState->nextMXact = nextMulti; --- 1960,1966 ---- MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset) { ! debug_elog4(DEBUG2, "MultiXact: setting next multi to " XID_FMT " offset " INT64_FORMAT, nextMulti, nextMultiOffset); LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); MultiXactState->nextMXact = nextMulti; *************** SetMultiXactIdLimit(MultiXactId oldest_d *** 2195,2246 **** bool is_startup) { MultiXactId multiVacLimit; - MultiXactId multiWarnLimit; - MultiXactId multiStopLimit; - MultiXactId multiWrapLimit; - MultiXactId curMulti; - bool needs_offset_vacuum; Assert(MultiXactIdIsValid(oldest_datminmxid)); /* - * We pretend that a wrap will happen halfway through the multixact ID - * space, but that's not really true, because multixacts wrap differently - * from transaction IDs. Note that, separately from any concern about - * multixact IDs wrapping, we must ensure that multixact members do not - * wrap. Limits for that are set in DetermineSafeOldestOffset, not here. - */ - multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1); - if (multiWrapLimit < FirstMultiXactId) - multiWrapLimit += FirstMultiXactId; - - /* - * We'll refuse to continue assigning MultiXactIds once we get within 100 - * multi of data loss. - * - * Note: This differs from the magic number used in - * SetTransactionIdLimit() since vacuum itself will never generate new - * multis. XXX actually it does, if it needs to freeze old multis. - */ - multiStopLimit = multiWrapLimit - 100; - if (multiStopLimit < FirstMultiXactId) - multiStopLimit -= FirstMultiXactId; - - /* - * We'll start complaining loudly when we get within 10M multis of the - * stop point. This is kind of arbitrary, but if you let your gas gauge - * get down to 1% of full, would you be looking for the next gas station? - * We need to be fairly liberal about this number because there are lots - * of scenarios where most transactions are done by automatic clients that - * won't pay attention to warnings. (No, we're not gonna make this - * configurable. If you know enough to configure it, you know enough to - * not get in this kind of trouble in the first place.) - */ - multiWarnLimit = multiStopLimit - 10000000; - if (multiWarnLimit < FirstMultiXactId) - multiWarnLimit -= FirstMultiXactId; - - /* * We'll start trying to force autovacuums when oldest_datminmxid gets to * be more than autovacuum_multixact_freeze_max_age mxids old. * --- 1995,2004 ---- *************** SetMultiXactIdLimit(MultiXactId oldest_d *** 2249,2273 **** * its value. See SetTransactionIdLimit. */ multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age; - if (multiVacLimit < FirstMultiXactId) - multiVacLimit += FirstMultiXactId; /* Grab lock for just long enough to set the new limit values */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); MultiXactState->oldestMultiXactId = oldest_datminmxid; MultiXactState->oldestMultiXactDB = oldest_datoid; MultiXactState->multiVacLimit = multiVacLimit; - MultiXactState->multiWarnLimit = multiWarnLimit; - MultiXactState->multiStopLimit = multiStopLimit; - MultiXactState->multiWrapLimit = multiWrapLimit; - curMulti = MultiXactState->nextMXact; LWLockRelease(MultiXactGenLock); - /* Log the info */ - ereport(DEBUG1, - (errmsg("MultiXactId wrap limit is %u, limited by database with OID %u", - multiWrapLimit, oldest_datoid))); - /* * Computing the actual limits is only possible once the data directory is * in a consistent state. There's no need to compute the limits while --- 2007,2020 ---- *************** SetMultiXactIdLimit(MultiXactId oldest_d *** 2279,2337 **** return; Assert(!InRecovery); - - /* Set limits for offset vacuum. */ - needs_offset_vacuum = SetOffsetVacuumLimit(is_startup); - - /* - * If past the autovacuum force point, immediately signal an autovac - * request. The reason for this is that autovac only processes one - * database per invocation. Once it's finished cleaning up the oldest - * database, it'll call here, and we'll signal the postmaster to start - * another iteration immediately if there are still any old databases. - */ - if ((MultiXactIdPrecedes(multiVacLimit, curMulti) || - needs_offset_vacuum) && IsUnderPostmaster) - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* Give an immediate warning if past the wrap warn point */ - if (MultiXactIdPrecedes(multiWarnLimit, curMulti)) - { - char *oldest_datname; - - /* - * We can be called when not inside a transaction, for example during - * StartupXLOG(). In such a case we cannot do database access, so we - * must just report the oldest DB's OID. - * - * Note: it's also possible that get_database_name fails and returns - * NULL, for example because the database just got dropped. We'll - * still warn, even though the warning might now be unnecessary. - */ - if (IsTransactionState()) - oldest_datname = get_database_name(oldest_datoid); - else - oldest_datname = NULL; - - if (oldest_datname) - ereport(WARNING, - (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used", - "database \"%s\" must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - curMulti, - oldest_datname, - multiWrapLimit - curMulti), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions."))); - else - ereport(WARNING, - (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used", - "database with OID %u must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - curMulti, - oldest_datoid, - multiWrapLimit - curMulti), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions."))); - } } /* --- 2026,2031 ---- *************** MultiXactAdvanceNextMXact(MultiXactId mi *** 2350,2361 **** LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti)) { ! debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti); MultiXactState->nextMXact = minMulti; } ! if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset)) { ! debug_elog3(DEBUG2, "MultiXact: setting next offset to %u", minMultiOffset); MultiXactState->nextOffset = minMultiOffset; } --- 2044,2055 ---- LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti)) { ! debug_elog3(DEBUG2, "MultiXact: setting next multi to " XID_FMT, minMulti); MultiXactState->nextMXact = minMulti; } ! if (MultiXactState->nextOffset < minMultiOffset) { ! debug_elog3(DEBUG2, "MultiXact: setting next offset to " INT64_FORMAT, minMultiOffset); MultiXactState->nextOffset = minMultiOffset; } *************** MultiXactAdvanceOldest(MultiXactId oldes *** 2388,2394 **** static void ExtendMultiXactOffset(MultiXactId multi) { ! int pageno; /* * No work except at first MultiXactId of a page. But beware: just after --- 2082,2088 ---- static void ExtendMultiXactOffset(MultiXactId multi) { ! int64 pageno; /* * No work except at first MultiXactId of a page. But beware: just after *************** ExtendMultiXactMember(MultiXactOffset of *** 2437,2443 **** flagsbit = MXOffsetToFlagsBitShift(offset); if (flagsoff == 0 && flagsbit == 0) { ! int pageno; pageno = MXOffsetToMemberPage(offset); --- 2131,2137 ---- flagsbit = MXOffsetToFlagsBitShift(offset); if (flagsoff == 0 && flagsbit == 0) { ! int64 pageno; pageno = MXOffsetToMemberPage(offset); *************** ExtendMultiXactMember(MultiXactOffset of *** 2449,2471 **** LWLockRelease(MultiXactMemberControlLock); } ! /* ! * Compute the number of items till end of current page. Careful: if ! * addition of unsigned ints wraps around, we're at the last page of ! * the last segment; since that page holds a different number of items ! * than other pages, we need to do it differently. ! */ ! if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset) ! { ! /* ! * This is the last page of the last segment; we can compute the ! * number of items left to allocate in it without modulo ! * arithmetic. ! */ ! difference = MaxMultiXactOffset - offset + 1; ! } ! else ! difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE; /* * Advance to next page, taking care to properly handle the wraparound --- 2143,2149 ---- LWLockRelease(MultiXactMemberControlLock); } ! difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE; /* * Advance to next page, taking care to properly handle the wraparound *************** GetOldestMultiXactId(void) *** 2530,2712 **** } /* - * Determine how aggressively we need to vacuum in order to prevent member - * wraparound. - * - * To do so determine what's the oldest member offset and install the limit - * info in MultiXactState, where it can be used to prevent overrun of old data - * in the members SLRU area. - * - * The return value is true if emergency autovacuum is required and false - * otherwise. - */ - static bool - SetOffsetVacuumLimit(bool is_startup) - { - MultiXactId oldestMultiXactId; - MultiXactId nextMXact; - MultiXactOffset oldestOffset = 0; /* placate compiler */ - MultiXactOffset prevOldestOffset; - MultiXactOffset nextOffset; - bool oldestOffsetKnown = false; - bool prevOldestOffsetKnown; - MultiXactOffset offsetStopLimit = 0; - MultiXactOffset prevOffsetStopLimit; - - /* - * NB: Have to prevent concurrent truncation, we might otherwise try to - * lookup a oldestMulti that's concurrently getting truncated away. - */ - LWLockAcquire(MultiXactTruncationLock, LW_SHARED); - - /* Read relevant fields from shared memory. */ - LWLockAcquire(MultiXactGenLock, LW_SHARED); - oldestMultiXactId = MultiXactState->oldestMultiXactId; - nextMXact = MultiXactState->nextMXact; - nextOffset = MultiXactState->nextOffset; - prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown; - prevOldestOffset = MultiXactState->oldestOffset; - prevOffsetStopLimit = MultiXactState->offsetStopLimit; - Assert(MultiXactState->finishedStartup); - LWLockRelease(MultiXactGenLock); - - /* - * Determine the offset of the oldest multixact. Normally, we can read - * the offset from the multixact itself, but there's an important special - * case: if there are no multixacts in existence at all, oldestMXact - * obviously can't point to one. It will instead point to the multixact - * ID that will be assigned the next time one is needed. - */ - if (oldestMultiXactId == nextMXact) - { - /* - * When the next multixact gets created, it will be stored at the next - * offset. - */ - oldestOffset = nextOffset; - oldestOffsetKnown = true; - } - else - { - /* - * Figure out where the oldest existing multixact's offsets are - * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X, - * the supposedly-earliest multixact might not really exist. We are - * careful not to fail in that case. - */ - oldestOffsetKnown = - find_multixact_start(oldestMultiXactId, &oldestOffset); - - if (oldestOffsetKnown) - ereport(DEBUG1, - (errmsg("oldest MultiXactId member is at offset %u", - oldestOffset))); - else - ereport(LOG, - (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk", - oldestMultiXactId))); - } - - LWLockRelease(MultiXactTruncationLock); - - /* - * If we can, compute limits (and install them MultiXactState) to prevent - * overrun of old data in the members SLRU area. We can only do so if the - * oldest offset is known though. - */ - if (oldestOffsetKnown) - { - /* move back to start of the corresponding segment */ - offsetStopLimit = oldestOffset - (oldestOffset % - (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT)); - - /* always leave one segment before the wraparound point */ - offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT); - - if (!prevOldestOffsetKnown && !is_startup) - ereport(LOG, - (errmsg("MultiXact member wraparound protections are now enabled"))); - - ereport(DEBUG1, - (errmsg("MultiXact member stop limit is now %u based on MultiXact %u", - offsetStopLimit, oldestMultiXactId))); - } - else if (prevOldestOffsetKnown) - { - /* - * If we failed to get the oldest offset this time, but we have a - * value from a previous pass through this function, use the old - * values rather than automatically forcing an emergency autovacuum - * cycle again. - */ - oldestOffset = prevOldestOffset; - oldestOffsetKnown = true; - offsetStopLimit = prevOffsetStopLimit; - } - - /* Install the computed values */ - LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); - MultiXactState->oldestOffset = oldestOffset; - MultiXactState->oldestOffsetKnown = oldestOffsetKnown; - MultiXactState->offsetStopLimit = offsetStopLimit; - LWLockRelease(MultiXactGenLock); - - /* - * Do we need an emergency autovacuum? If we're not sure, assume yes. - */ - return !oldestOffsetKnown || - (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD); - } - - /* - * Return whether adding "distance" to "start" would move past "boundary". - * - * We use this to determine whether the addition is "wrapping around" the - * boundary point, hence the name. The reason we don't want to use the regular - * 2^31-modulo arithmetic here is that we want to be able to use the whole of - * the 2^32-1 space here, allowing for more multixacts that would fit - * otherwise. - */ - static bool - MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start, - uint32 distance) - { - MultiXactOffset finish; - - /* - * Note that offset number 0 is not used (see GetMultiXactIdMembers), so - * if the addition wraps around the UINT_MAX boundary, skip that value. - */ - finish = start + distance; - if (finish < start) - finish++; - - /*----------------------------------------------------------------------- - * When the boundary is numerically greater than the starting point, any - * value numerically between the two is not wrapped: - * - * <----S----B----> - * [---) = F wrapped past B (and UINT_MAX) - * [---) = F not wrapped - * [----] = F wrapped past B - * - * When the boundary is numerically less than the starting point (i.e. the - * UINT_MAX wraparound occurs somewhere in between) then all values in - * between are wrapped: - * - * <----B----S----> - * [---) = F not wrapped past B (but wrapped past UINT_MAX) - * [---) = F wrapped past B (and UINT_MAX) - * [----] = F not wrapped - *----------------------------------------------------------------------- - */ - if (start < boundary) - return finish >= boundary || finish < start; - else - return finish >= boundary && finish < start; - } - - /* * Find the starting offset of the given MultiXactId. * * Returns false if the file containing the multi does not exist on disk. --- 2208,2213 ---- *************** static bool *** 2719,2725 **** find_multixact_start(MultiXactId multi, MultiXactOffset *result) { MultiXactOffset offset; ! int pageno; int entryno; int slotno; MultiXactOffset *offptr; --- 2220,2226 ---- find_multixact_start(MultiXactId multi, MultiXactOffset *result) { MultiXactOffset offset; ! int64 pageno; int entryno; int slotno; MultiXactOffset *offptr; *************** find_multixact_start(MultiXactId multi, *** 2753,2849 **** return true; } - /* - * Determine how many multixacts, and how many multixact members, currently - * exist. Return false if unable to determine. - */ - static bool - ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) - { - MultiXactOffset nextOffset; - MultiXactOffset oldestOffset; - MultiXactId oldestMultiXactId; - MultiXactId nextMultiXactId; - bool oldestOffsetKnown; - - LWLockAcquire(MultiXactGenLock, LW_SHARED); - nextOffset = MultiXactState->nextOffset; - oldestMultiXactId = MultiXactState->oldestMultiXactId; - nextMultiXactId = MultiXactState->nextMXact; - oldestOffset = MultiXactState->oldestOffset; - oldestOffsetKnown = MultiXactState->oldestOffsetKnown; - LWLockRelease(MultiXactGenLock); - - if (!oldestOffsetKnown) - return false; - - *members = nextOffset - oldestOffset; - *multixacts = nextMultiXactId - oldestMultiXactId; - return true; - } - - /* - * Multixact members can be removed once the multixacts that refer to them - * are older than every datminxmid. autovacuum_multixact_freeze_max_age and - * vacuum_multixact_freeze_table_age work together to make sure we never have - * too many multixacts; we hope that, at least under normal circumstances, - * this will also be sufficient to keep us from using too many offsets. - * However, if the average multixact has many members, we might exhaust the - * members space while still using few enough members that these limits fail - * to trigger full table scans for relminmxid advancement. At that point, - * we'd have no choice but to start failing multixact-creating operations - * with an error. - * - * To prevent that, if more than a threshold portion of the members space is - * used, we effectively reduce autovacuum_multixact_freeze_max_age and - * to a value just less than the number of multixacts in use. We hope that - * this will quickly trigger autovacuuming on the table or tables with the - * oldest relminmxid, thus allowing datminmxid values to advance and removing - * some members. - * - * As the fraction of the member space currently in use grows, we become - * more aggressive in clamping this value. That not only causes autovacuum - * to ramp up, but also makes any manual vacuums the user issues more - * aggressive. This happens because vacuum_set_xid_limits() clamps the - * freeze table and the minimum freeze age based on the effective - * autovacuum_multixact_freeze_max_age this function returns. In the worst - * case, we'll claim the freeze_max_age to zero, and every vacuum of any - * table will try to freeze every multixact. - * - * It's possible that these thresholds should be user-tunable, but for now - * we keep it simple. - */ - int - MultiXactMemberFreezeThreshold(void) - { - MultiXactOffset members; - uint32 multixacts; - uint32 victim_multixacts; - double fraction; - - /* If we can't determine member space utilization, assume the worst. */ - if (!ReadMultiXactCounts(&multixacts, &members)) - return 0; - - /* If member space utilization is low, no special action is required. */ - if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD) - return autovacuum_multixact_freeze_max_age; - - /* - * Compute a target for relminmxid advancement. The number of multixacts - * we try to eliminate from the system is based on how far we are past - * MULTIXACT_MEMBER_SAFE_THRESHOLD. - */ - fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) / - (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD); - victim_multixacts = multixacts * fraction; - - /* fraction could be > 1.0, but lowest possible freeze age is zero */ - if (victim_multixacts > multixacts) - return 0; - return multixacts - victim_multixacts; - } - typedef struct mxtruncinfo { int earliestExistingPage; --- 2254,2259 ---- *************** typedef struct mxtruncinfo *** 2854,2865 **** * This callback determines the earliest existing page number. */ static bool ! SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data) { mxtruncinfo *trunc = (mxtruncinfo *) data; if (trunc->earliestExistingPage == -1 || ! ctl->PagePrecedes(segpage, trunc->earliestExistingPage)) { trunc->earliestExistingPage = segpage; } --- 2264,2275 ---- * This callback determines the earliest existing page number. */ static bool ! SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data) { mxtruncinfo *trunc = (mxtruncinfo *) data; if (trunc->earliestExistingPage == -1 || ! segpage < trunc->earliestExistingPage) { trunc->earliestExistingPage = segpage; } *************** SlruScanDirCbFindEarliest(SlruCtl ctl, c *** 2879,2888 **** static void PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset) { ! const int maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset); ! int startsegment = MXOffsetToMemberSegment(oldestOffset); ! int endsegment = MXOffsetToMemberSegment(newOldestOffset); ! int segment = startsegment; /* * Delete all the segments but the last one. The last segment can still --- 2289,2298 ---- static void PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset) { ! const int64 maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset); ! int64 startsegment = MXOffsetToMemberSegment(oldestOffset); ! int64 endsegment = MXOffsetToMemberSegment(newOldestOffset); ! int64 segment = startsegment; /* * Delete all the segments but the last one. The last segment can still *************** PerformMembersTruncation(MultiXactOffset *** 2890,2896 **** */ while (segment != endsegment) { ! elog(DEBUG2, "truncating multixact members segment %x", segment); SlruDeleteSegment(MultiXactMemberCtl, segment); /* move to next segment, handling wraparound correctly */ --- 2300,2307 ---- */ while (segment != endsegment) { ! elog(DEBUG2, "truncating multixact members segment %04x%08x", ! (uint32) (segment << 32), (uint32) segment); SlruDeleteSegment(MultiXactMemberCtl, segment); /* move to next segment, handling wraparound correctly */ *************** TruncateMultiXact(MultiXactId newOldestM *** 3008,3014 **** else if (!find_multixact_start(oldestMulti, &oldestOffset)) { ereport(LOG, ! (errmsg("oldest MultiXact %u not found, earliest MultiXact %u, skipping truncation", oldestMulti, earliest))); LWLockRelease(MultiXactTruncationLock); return; --- 2419,2425 ---- else if (!find_multixact_start(oldestMulti, &oldestOffset)) { ereport(LOG, ! (errmsg("oldest MultiXact " XID_FMT " not found, earliest MultiXact " XID_FMT ", skipping truncation", oldestMulti, earliest))); LWLockRelease(MultiXactTruncationLock); return; *************** TruncateMultiXact(MultiXactId newOldestM *** 3026,3046 **** else if (!find_multixact_start(newOldestMulti, &newOldestOffset)) { ereport(LOG, ! (errmsg("cannot truncate up to MultiXact %u because it does not exist on disk, skipping truncation", newOldestMulti))); LWLockRelease(MultiXactTruncationLock); return; } elog(DEBUG1, "performing multixact truncation: " ! "offsets [%u, %u), offsets segments [%x, %x), " ! "members [%u, %u), members segments [%x, %x)", oldestMulti, newOldestMulti, ! MultiXactIdToOffsetSegment(oldestMulti), ! MultiXactIdToOffsetSegment(newOldestMulti), oldestOffset, newOldestOffset, ! MXOffsetToMemberSegment(oldestOffset), ! MXOffsetToMemberSegment(newOldestOffset)); /* * Do truncation, and the WAL logging of the truncation, in a critical --- 2437,2463 ---- else if (!find_multixact_start(newOldestMulti, &newOldestOffset)) { ereport(LOG, ! (errmsg("cannot truncate up to MultiXact " XID_FMT " because it does not exist on disk, skipping truncation", newOldestMulti))); LWLockRelease(MultiXactTruncationLock); return; } elog(DEBUG1, "performing multixact truncation: " ! "offsets [" XID_FMT ", " XID_FMT "), " ! "offsets segments [%04x%08x, %04x%08x), " ! "members [" INT64_FORMAT ", " INT64_FORMAT "), " ! "members segments [%04x%08x, %04x%08x)", oldestMulti, newOldestMulti, ! (uint32) (MultiXactIdToOffsetSegment(oldestMulti) >> 32), ! (uint32) MultiXactIdToOffsetSegment(oldestMulti), ! (uint32) (MultiXactIdToOffsetSegment(newOldestMulti) >> 32), ! (uint32) MultiXactIdToOffsetSegment(newOldestMulti), oldestOffset, newOldestOffset, ! (uint32) (MXOffsetToMemberSegment(oldestOffset) >> 32), ! (uint32) MXOffsetToMemberSegment(oldestOffset), ! (uint32) (MXOffsetToMemberSegment(newOldestOffset) >> 32), ! (uint32) MXOffsetToMemberSegment(newOldestOffset)); /* * Do truncation, and the WAL logging of the truncation, in a critical *************** TruncateMultiXact(MultiXactId newOldestM *** 3090,3184 **** } /* - * Decide which of two MultiXactOffset page numbers is "older" for truncation - * purposes. - * - * We need to use comparison of MultiXactId here in order to do the right - * thing with wraparound. However, if we are asked about page number zero, we - * don't want to hand InvalidMultiXactId to MultiXactIdPrecedes: it'll get - * weird. So, offset both multis by FirstMultiXactId to avoid that. - * (Actually, the current implementation doesn't do anything weird with - * InvalidMultiXactId, but there's no harm in leaving this code like this.) - */ - static bool - MultiXactOffsetPagePrecedes(int page1, int page2) - { - MultiXactId multi1; - MultiXactId multi2; - - multi1 = ((MultiXactId) page1) * MULTIXACT_OFFSETS_PER_PAGE; - multi1 += FirstMultiXactId; - multi2 = ((MultiXactId) page2) * MULTIXACT_OFFSETS_PER_PAGE; - multi2 += FirstMultiXactId; - - return MultiXactIdPrecedes(multi1, multi2); - } - - /* - * Decide which of two MultiXactMember page numbers is "older" for truncation - * purposes. There is no "invalid offset number" so use the numbers verbatim. - */ - static bool - MultiXactMemberPagePrecedes(int page1, int page2) - { - MultiXactOffset offset1; - MultiXactOffset offset2; - - offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE; - offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE; - - return MultiXactOffsetPrecedes(offset1, offset2); - } - - /* - * Decide which of two MultiXactIds is earlier. - * - * XXX do we need to do something special for InvalidMultiXactId? - * (Doesn't look like it.) - */ - bool - MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2) - { - int32 diff = (int32) (multi1 - multi2); - - return (diff < 0); - } - - /* - * MultiXactIdPrecedesOrEquals -- is multi1 logically <= multi2? - * - * XXX do we need to do something special for InvalidMultiXactId? - * (Doesn't look like it.) - */ - bool - MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2) - { - int32 diff = (int32) (multi1 - multi2); - - return (diff <= 0); - } - - - /* - * Decide which of two offsets is earlier. - */ - static bool - MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2) - { - int32 diff = (int32) (offset1 - offset2); - - return (diff < 0); - } - - /* * Write an xlog record reflecting the zeroing of either a MEMBERs or * OFFSETs page (info shows which) */ static void ! WriteMZeroPageXlogRec(int pageno, uint8 info) { XLogBeginInsert(); ! XLogRegisterData((char *) (&pageno), sizeof(int)); (void) XLogInsert(RM_MULTIXACT_ID, info); } --- 2507,2520 ---- } /* * Write an xlog record reflecting the zeroing of either a MEMBERs or * OFFSETs page (info shows which) */ static void ! WriteMZeroPageXlogRec(int64 pageno, uint8 info) { XLogBeginInsert(); ! XLogRegisterData((char *) (&pageno), sizeof(int64)); (void) XLogInsert(RM_MULTIXACT_ID, info); } *************** multixact_redo(XLogReaderState *record) *** 3223,3232 **** if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE) { ! int pageno; int slotno; ! memcpy(&pageno, XLogRecGetData(record), sizeof(int)); LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE); --- 2559,2568 ---- if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE) { ! int64 pageno; int slotno; ! memcpy(&pageno, XLogRecGetData(record), sizeof(int64)); LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE); *************** multixact_redo(XLogReaderState *record) *** 3238,3247 **** } else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE) { ! int pageno; int slotno; ! memcpy(&pageno, XLogRecGetData(record), sizeof(int)); LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE); --- 2574,2583 ---- } else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE) { ! int64 pageno; int slotno; ! memcpy(&pageno, XLogRecGetData(record), sizeof(int64)); LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE); *************** multixact_redo(XLogReaderState *record) *** 3295,3314 **** else if (info == XLOG_MULTIXACT_TRUNCATE_ID) { xl_multixact_truncate xlrec; ! int pageno; memcpy(&xlrec, XLogRecGetData(record), SizeOfMultiXactTruncate); elog(DEBUG1, "replaying multixact truncation: " ! "offsets [%u, %u), offsets segments [%x, %x), " ! "members [%u, %u), members segments [%x, %x)", xlrec.startTruncOff, xlrec.endTruncOff, ! MultiXactIdToOffsetSegment(xlrec.startTruncOff), ! MultiXactIdToOffsetSegment(xlrec.endTruncOff), xlrec.startTruncMemb, xlrec.endTruncMemb, ! MXOffsetToMemberSegment(xlrec.startTruncMemb), ! MXOffsetToMemberSegment(xlrec.endTruncMemb)); /* should not be required, but more than cheap enough */ LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE); --- 2631,2656 ---- else if (info == XLOG_MULTIXACT_TRUNCATE_ID) { xl_multixact_truncate xlrec; ! int64 pageno; memcpy(&xlrec, XLogRecGetData(record), SizeOfMultiXactTruncate); elog(DEBUG1, "replaying multixact truncation: " ! "offsets [" XID_FMT ", " XID_FMT "), " ! "offsets segments [%04x%08x, %04x%08x), " ! "members [" INT64_FORMAT ", " INT64_FORMAT "), " ! "members segments [%04x%08x, %04x%08x)", xlrec.startTruncOff, xlrec.endTruncOff, ! (uint32) (MultiXactIdToOffsetSegment(xlrec.startTruncOff) >> 32), ! (uint32) MultiXactIdToOffsetSegment(xlrec.startTruncOff), ! (uint32) (MultiXactIdToOffsetSegment(xlrec.endTruncOff) >> 32), ! (uint32) MultiXactIdToOffsetSegment(xlrec.endTruncOff), xlrec.startTruncMemb, xlrec.endTruncMemb, ! (uint32) (MXOffsetToMemberSegment(xlrec.startTruncMemb) >> 32), ! (uint32) MXOffsetToMemberSegment(xlrec.startTruncMemb), ! (uint32) (MXOffsetToMemberSegment(xlrec.endTruncMemb) >> 32), ! (uint32) MXOffsetToMemberSegment(xlrec.endTruncMemb)); /* should not be required, but more than cheap enough */ LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE); *************** pg_get_multixact_members(PG_FUNCTION_ARG *** 3352,3358 **** if (mxid < FirstMultiXactId) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("invalid MultiXactId: %u", mxid))); if (SRF_IS_FIRSTCALL()) { --- 2694,2700 ---- if (mxid < FirstMultiXactId) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("invalid MultiXactId: " XID_FMT, mxid))); if (SRF_IS_FIRSTCALL()) { *************** pg_get_multixact_members(PG_FUNCTION_ARG *** 3388,3394 **** HeapTuple tuple; char *values[2]; ! values[0] = psprintf("%u", multi->members[multi->iter].xid); values[1] = mxstatus_to_string(multi->members[multi->iter].status); tuple = BuildTupleFromCStrings(funccxt->attinmeta, values); --- 2730,2736 ---- HeapTuple tuple; char *values[2]; ! values[0] = psprintf(XID_FMT, multi->members[multi->iter].xid); values[1] = mxstatus_to_string(multi->members[multi->iter].status); tuple = BuildTupleFromCStrings(funccxt->attinmeta, values); diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c new file mode 100644 index 77edc51..a120e46 *** a/src/backend/access/transam/slru.c --- b/src/backend/access/transam/slru.c *************** *** 58,67 **** #include "storage/fd.h" #include "storage/shmem.h" #include "miscadmin.h" #define SlruFileName(ctl, path, seg) \ ! snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg) /* * During SimpleLruFlush(), we will usually not need to write/fsync more --- 58,69 ---- #include "storage/fd.h" #include "storage/shmem.h" #include "miscadmin.h" + #include "utils/builtins.h" #define SlruFileName(ctl, path, seg) \ ! snprintf(path, MAXPGPATH, "%s/%04X%08X", (ctl)->Dir, \ ! (uint32) ((seg) >> 32), (uint32) ((seg) & (int64)0xFFFFFFFF)) /* * During SimpleLruFlush(), we will usually not need to write/fsync more *************** typedef struct SlruFlushData *** 76,82 **** { int num_files; /* # files actually open */ int fd[MAX_FLUSH_BUFFERS]; /* their FD's */ ! int segno[MAX_FLUSH_BUFFERS]; /* their log seg#s */ } SlruFlushData; typedef struct SlruFlushData *SlruFlush; --- 78,84 ---- { int num_files; /* # files actually open */ int fd[MAX_FLUSH_BUFFERS]; /* their FD's */ ! int64 segno[MAX_FLUSH_BUFFERS]; /* their log seg#s */ } SlruFlushData; typedef struct SlruFlushData *SlruFlush; *************** static int slru_errno; *** 127,140 **** static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno); static void SimpleLruWaitIO(SlruCtl ctl, int slotno); static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata); ! static bool SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno); ! static bool SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata); ! static void SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid); ! static int SlruSelectLRUPage(SlruCtl ctl, int pageno); static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, ! int segpage, void *data); static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename); /* --- 129,142 ---- static void SimpleLruZeroLSNs(SlruCtl ctl, int slotno); static void SimpleLruWaitIO(SlruCtl ctl, int slotno); static void SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata); ! static bool SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno); ! static bool SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruFlush fdata); ! static void SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid); ! static int SlruSelectLRUPage(SlruCtl ctl, int64 pageno); static bool SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, ! int64 segpage, void *data); static void SlruInternalDeleteSegment(SlruCtl ctl, char *filename); /* *************** SimpleLruShmemSize(int nslots, int nlsns *** 151,157 **** sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */ sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */ sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */ ! sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */ sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */ sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */ --- 153,159 ---- sz += MAXALIGN(nslots * sizeof(char *)); /* page_buffer[] */ sz += MAXALIGN(nslots * sizeof(SlruPageStatus)); /* page_status[] */ sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */ ! sz += MAXALIGN(nslots * sizeof(int64)); /* page_number[] */ sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */ sz += MAXALIGN(nslots * sizeof(LWLockPadded)); /* buffer_locks[] */ *************** SimpleLruInit(SlruCtl ctl, const char *n *** 200,207 **** offset += MAXALIGN(nslots * sizeof(SlruPageStatus)); shared->page_dirty = (bool *) (ptr + offset); offset += MAXALIGN(nslots * sizeof(bool)); ! shared->page_number = (int *) (ptr + offset); ! offset += MAXALIGN(nslots * sizeof(int)); shared->page_lru_count = (int *) (ptr + offset); offset += MAXALIGN(nslots * sizeof(int)); --- 202,209 ---- offset += MAXALIGN(nslots * sizeof(SlruPageStatus)); shared->page_dirty = (bool *) (ptr + offset); offset += MAXALIGN(nslots * sizeof(bool)); ! shared->page_number = (int64 *) (ptr + offset); ! offset += MAXALIGN(nslots * sizeof(int64)); shared->page_lru_count = (int *) (ptr + offset); offset += MAXALIGN(nslots * sizeof(int)); *************** SimpleLruInit(SlruCtl ctl, const char *n *** 260,266 **** * Control lock must be held at entry, and will be held at exit. */ int ! SimpleLruZeroPage(SlruCtl ctl, int pageno) { SlruShared shared = ctl->shared; int slotno; --- 262,268 ---- * Control lock must be held at entry, and will be held at exit. */ int ! SimpleLruZeroPage(SlruCtl ctl, int64 pageno) { SlruShared shared = ctl->shared; int slotno; *************** SimpleLruWaitIO(SlruCtl ctl, int slotno) *** 372,378 **** * Control lock must be held at entry, and will be held at exit. */ int ! SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid) { SlruShared shared = ctl->shared; --- 374,380 ---- * Control lock must be held at entry, and will be held at exit. */ int ! SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid) { SlruShared shared = ctl->shared; *************** SimpleLruReadPage(SlruCtl ctl, int pagen *** 464,470 **** * It is unspecified whether the lock will be shared or exclusive. */ int ! SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid) { SlruShared shared = ctl->shared; int slotno; --- 466,472 ---- * It is unspecified whether the lock will be shared or exclusive. */ int ! SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid) { SlruShared shared = ctl->shared; int slotno; *************** static void *** 507,513 **** SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) { SlruShared shared = ctl->shared; ! int pageno = shared->page_number[slotno]; bool ok; /* If a write is in progress, wait for it to finish */ --- 509,515 ---- SlruInternalWritePage(SlruCtl ctl, int slotno, SlruFlush fdata) { SlruShared shared = ctl->shared; ! int64 pageno = shared->page_number[slotno]; bool ok; /* If a write is in progress, wait for it to finish */ *************** SimpleLruWritePage(SlruCtl ctl, int slot *** 587,595 **** * large enough to contain the given page. */ bool ! SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno) { ! int segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char path[MAXPGPATH]; --- 589,597 ---- * large enough to contain the given page. */ bool ! SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno) { ! int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char path[MAXPGPATH]; *************** SimpleLruDoesPhysicalPageExist(SlruCtl c *** 636,645 **** * read/write operations. We could cache one virtual file pointer ... */ static bool ! SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno) { SlruShared shared = ctl->shared; ! int segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char path[MAXPGPATH]; --- 638,647 ---- * read/write operations. We could cache one virtual file pointer ... */ static bool ! SlruPhysicalReadPage(SlruCtl ctl, int64 pageno, int slotno) { SlruShared shared = ctl->shared; ! int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char path[MAXPGPATH]; *************** SlruPhysicalReadPage(SlruCtl ctl, int pa *** 716,725 **** * SimpleLruFlush. */ static bool ! SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata) { SlruShared shared = ctl->shared; ! int segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char path[MAXPGPATH]; --- 718,727 ---- * SimpleLruFlush. */ static bool ! SlruPhysicalWritePage(SlruCtl ctl, int64 pageno, int slotno, SlruFlush fdata) { SlruShared shared = ctl->shared; ! int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char path[MAXPGPATH]; *************** SlruPhysicalWritePage(SlruCtl ctl, int p *** 890,898 **** * SlruPhysicalWritePage. Call this after cleaning up shared-memory state. */ static void ! SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid) { ! int segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char path[MAXPGPATH]; --- 892,900 ---- * SlruPhysicalWritePage. Call this after cleaning up shared-memory state. */ static void ! SlruReportIOError(SlruCtl ctl, int64 pageno, TransactionId xid) { ! int64 segno = pageno / SLRU_PAGES_PER_SEGMENT; int rpageno = pageno % SLRU_PAGES_PER_SEGMENT; int offset = rpageno * BLCKSZ; char path[MAXPGPATH]; *************** SlruReportIOError(SlruCtl ctl, int pagen *** 904,944 **** case SLRU_OPEN_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction %u", xid), errdetail("Could not open file \"%s\": %m.", path))); break; case SLRU_SEEK_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction %u", xid), errdetail("Could not seek in file \"%s\" to offset %u: %m.", path, offset))); break; case SLRU_READ_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction %u", xid), errdetail("Could not read from file \"%s\" at offset %u: %m.", path, offset))); break; case SLRU_WRITE_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction %u", xid), errdetail("Could not write to file \"%s\" at offset %u: %m.", path, offset))); break; case SLRU_FSYNC_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction %u", xid), errdetail("Could not fsync file \"%s\": %m.", path))); break; case SLRU_CLOSE_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction %u", xid), errdetail("Could not close file \"%s\": %m.", path))); break; --- 906,946 ---- case SLRU_OPEN_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction " XID_FMT, xid), errdetail("Could not open file \"%s\": %m.", path))); break; case SLRU_SEEK_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction " XID_FMT, xid), errdetail("Could not seek in file \"%s\" to offset %u: %m.", path, offset))); break; case SLRU_READ_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction " XID_FMT, xid), errdetail("Could not read from file \"%s\" at offset %u: %m.", path, offset))); break; case SLRU_WRITE_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction " XID_FMT, xid), errdetail("Could not write to file \"%s\" at offset %u: %m.", path, offset))); break; case SLRU_FSYNC_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction " XID_FMT, xid), errdetail("Could not fsync file \"%s\": %m.", path))); break; case SLRU_CLOSE_FAILED: ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not access status of transaction " XID_FMT, xid), errdetail("Could not close file \"%s\": %m.", path))); break; *************** SlruReportIOError(SlruCtl ctl, int pagen *** 964,970 **** * Control lock must be held at entry, and will be held at exit. */ static int ! SlruSelectLRUPage(SlruCtl ctl, int pageno) { SlruShared shared = ctl->shared; --- 966,972 ---- * Control lock must be held at entry, and will be held at exit. */ static int ! SlruSelectLRUPage(SlruCtl ctl, int64 pageno) { SlruShared shared = ctl->shared; *************** SlruSelectLRUPage(SlruCtl ctl, int pagen *** 975,984 **** int cur_count; int bestvalidslot = 0; /* keep compiler quiet */ int best_valid_delta = -1; ! int best_valid_page_number = 0; /* keep compiler quiet */ int bestinvalidslot = 0; /* keep compiler quiet */ int best_invalid_delta = -1; ! int best_invalid_page_number = 0; /* keep compiler quiet */ /* See if page already has a buffer assigned */ for (slotno = 0; slotno < shared->num_slots; slotno++) --- 977,986 ---- int cur_count; int bestvalidslot = 0; /* keep compiler quiet */ int best_valid_delta = -1; ! int64 best_valid_page_number = 0; /* keep compiler quiet */ int bestinvalidslot = 0; /* keep compiler quiet */ int best_invalid_delta = -1; ! int64 best_invalid_page_number = 0; /* keep compiler quiet */ /* See if page already has a buffer assigned */ for (slotno = 0; slotno < shared->num_slots; slotno++) *************** SlruSelectLRUPage(SlruCtl ctl, int pagen *** 1019,1025 **** for (slotno = 0; slotno < shared->num_slots; slotno++) { int this_delta; ! int this_page_number; if (shared->page_status[slotno] == SLRU_PAGE_EMPTY) return slotno; --- 1021,1027 ---- for (slotno = 0; slotno < shared->num_slots; slotno++) { int this_delta; ! int64 this_page_number; if (shared->page_status[slotno] == SLRU_PAGE_EMPTY) return slotno; *************** SlruSelectLRUPage(SlruCtl ctl, int pagen *** 1043,1050 **** { if (this_delta > best_valid_delta || (this_delta == best_valid_delta && ! ctl->PagePrecedes(this_page_number, ! best_valid_page_number))) { bestvalidslot = slotno; best_valid_delta = this_delta; --- 1045,1051 ---- { if (this_delta > best_valid_delta || (this_delta == best_valid_delta && ! this_page_number < best_valid_page_number)) { bestvalidslot = slotno; best_valid_delta = this_delta; *************** SlruSelectLRUPage(SlruCtl ctl, int pagen *** 1055,1062 **** { if (this_delta > best_invalid_delta || (this_delta == best_invalid_delta && ! ctl->PagePrecedes(this_page_number, ! best_invalid_page_number))) { bestinvalidslot = slotno; best_invalid_delta = this_delta; --- 1056,1062 ---- { if (this_delta > best_invalid_delta || (this_delta == best_invalid_delta && ! this_page_number < best_invalid_page_number)) { bestinvalidslot = slotno; best_invalid_delta = this_delta; *************** SimpleLruFlush(SlruCtl ctl, bool allow_r *** 1106,1112 **** SlruShared shared = ctl->shared; SlruFlushData fdata; int slotno; ! int pageno = 0; int i; bool ok; --- 1106,1112 ---- SlruShared shared = ctl->shared; SlruFlushData fdata; int slotno; ! int64 pageno = 0; int i; bool ok; *************** SimpleLruFlush(SlruCtl ctl, bool allow_r *** 1166,1172 **** * Remove all segments before the one holding the passed page number */ void ! SimpleLruTruncate(SlruCtl ctl, int cutoffPage) { SlruShared shared = ctl->shared; int slotno; --- 1166,1172 ---- * Remove all segments before the one holding the passed page number */ void ! SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage) { SlruShared shared = ctl->shared; int slotno; *************** restart:; *** 1192,1198 **** * have already wrapped around, and proceeding with the truncation would * risk removing the current segment. */ ! if (ctl->PagePrecedes(shared->latest_page_number, cutoffPage)) { LWLockRelease(shared->ControlLock); ereport(LOG, --- 1192,1198 ---- * have already wrapped around, and proceeding with the truncation would * risk removing the current segment. */ ! if (shared->latest_page_number < cutoffPage) { LWLockRelease(shared->ControlLock); ereport(LOG, *************** restart:; *** 1205,1211 **** { if (shared->page_status[slotno] == SLRU_PAGE_EMPTY) continue; ! if (!ctl->PagePrecedes(shared->page_number[slotno], cutoffPage)) continue; /* --- 1205,1211 ---- { if (shared->page_status[slotno] == SLRU_PAGE_EMPTY) continue; ! if (shared->page_number[slotno] >= cutoffPage) continue; /* *************** SlruInternalDeleteSegment(SlruCtl ctl, c *** 1259,1265 **** * Delete an individual SLRU segment, identified by the segment number. */ void ! SlruDeleteSegment(SlruCtl ctl, int segno) { SlruShared shared = ctl->shared; int slotno; --- 1259,1265 ---- * Delete an individual SLRU segment, identified by the segment number. */ void ! SlruDeleteSegment(SlruCtl ctl, int64 segno) { SlruShared shared = ctl->shared; int slotno; *************** restart: *** 1305,1311 **** if (did_write) goto restart; ! snprintf(path, MAXPGPATH, "%s/%04X", ctl->Dir, segno); ereport(DEBUG2, (errmsg("removing file \"%s\"", path))); unlink(path); --- 1305,1311 ---- if (did_write) goto restart; ! SlruFileName(ctl, path, segno); ereport(DEBUG2, (errmsg("removing file \"%s\"", path))); unlink(path); *************** restart: *** 1319,1331 **** * containing the page passed as "data". */ bool ! SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int segpage, void *data) { ! int cutoffPage = *(int *) data; cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; ! if (ctl->PagePrecedes(segpage, cutoffPage)) return true; /* found one; don't iterate any more */ return false; /* keep going */ --- 1319,1331 ---- * containing the page passed as "data". */ bool ! SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, int64 segpage, void *data) { ! int64 cutoffPage = *(int64 *) data; cutoffPage -= cutoffPage % SLRU_PAGES_PER_SEGMENT; ! if (segpage < cutoffPage) return true; /* found one; don't iterate any more */ return false; /* keep going */ *************** SlruScanDirCbReportPresence(SlruCtl ctl, *** 1336,1346 **** * This callback deletes segments prior to the one passed in as "data". */ static bool ! SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int segpage, void *data) { ! int cutoffPage = *(int *) data; ! if (ctl->PagePrecedes(segpage, cutoffPage)) SlruInternalDeleteSegment(ctl, filename); return false; /* keep going */ --- 1336,1346 ---- * This callback deletes segments prior to the one passed in as "data". */ static bool ! SlruScanDirCbDeleteCutoff(SlruCtl ctl, char *filename, int64 segpage, void *data) { ! int64 cutoffPage = *(int64 *) data; ! if (segpage < cutoffPage) SlruInternalDeleteSegment(ctl, filename); return false; /* keep going */ *************** SlruScanDirCbDeleteCutoff(SlruCtl ctl, c *** 1351,1357 **** * This callback deletes all segments. */ bool ! SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data) { SlruInternalDeleteSegment(ctl, filename); --- 1351,1357 ---- * This callback deletes all segments. */ bool ! SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage, void *data) { SlruInternalDeleteSegment(ctl, filename); *************** SlruScanDirectory(SlruCtl ctl, SlruScanC *** 1379,1386 **** bool retval = false; DIR *cldir; struct dirent *clde; ! int segno; ! int segpage; cldir = AllocateDir(ctl->Dir); while ((clde = ReadDir(cldir, ctl->Dir)) != NULL) --- 1379,1386 ---- bool retval = false; DIR *cldir; struct dirent *clde; ! int64 segno; ! int64 segpage; cldir = AllocateDir(ctl->Dir); while ((clde = ReadDir(cldir, ctl->Dir)) != NULL) *************** SlruScanDirectory(SlruCtl ctl, SlruScanC *** 1389,1398 **** len = strlen(clde->d_name); ! if ((len == 4 || len == 5 || len == 6) && strspn(clde->d_name, "0123456789ABCDEF") == len) { ! segno = (int) strtol(clde->d_name, NULL, 16); segpage = segno * SLRU_PAGES_PER_SEGMENT; elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s", --- 1389,1398 ---- len = strlen(clde->d_name); ! if ((len == 12 || len == 13 || len == 14) && strspn(clde->d_name, "0123456789ABCDEF") == len) { ! segno = pg_strtouint64(clde->d_name, NULL, 16); segpage = segno * SLRU_PAGES_PER_SEGMENT; elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s", diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c new file mode 100644 index f640661..cc9eee6 *** a/src/backend/access/transam/subtrans.c --- b/src/backend/access/transam/subtrans.c *************** static SlruCtlData SubTransCtlData; *** 63,70 **** #define SubTransCtl (&SubTransCtlData) ! static int ZeroSUBTRANSPage(int pageno); ! static bool SubTransPagePrecedes(int page1, int page2); /* --- 63,69 ---- #define SubTransCtl (&SubTransCtlData) ! static int ZeroSUBTRANSPage(int64 pageno); /* *************** static bool SubTransPagePrecedes(int pag *** 73,79 **** void SubTransSetParent(TransactionId xid, TransactionId parent) { ! int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; --- 72,78 ---- void SubTransSetParent(TransactionId xid, TransactionId parent) { ! int64 pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; *************** SubTransSetParent(TransactionId xid, Tra *** 108,114 **** TransactionId SubTransGetParent(TransactionId xid) { ! int pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; --- 107,113 ---- TransactionId SubTransGetParent(TransactionId xid) { ! int64 pageno = TransactionIdToPage(xid); int entryno = TransactionIdToEntry(xid); int slotno; TransactionId *ptr; *************** SubTransGetTopmostTransaction(Transactio *** 168,174 **** * structure that could lead to an infinite loop, so exit. */ if (!TransactionIdPrecedes(parentXid, previousXid)) ! elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u", previousXid, parentXid); } --- 167,173 ---- * structure that could lead to an infinite loop, so exit. */ if (!TransactionIdPrecedes(parentXid, previousXid)) ! elog(ERROR, "pg_subtrans contains invalid entry: xid " XID_FMT " points to parent xid " XID_FMT, previousXid, parentXid); } *************** SUBTRANSShmemSize(void) *** 190,196 **** void SUBTRANSShmemInit(void) { - SubTransCtl->PagePrecedes = SubTransPagePrecedes; SimpleLruInit(SubTransCtl, "subtrans", NUM_SUBTRANS_BUFFERS, 0, SubtransControlLock, "pg_subtrans", LWTRANCHE_SUBTRANS_BUFFERS); --- 189,194 ---- *************** BootStrapSUBTRANS(void) *** 234,240 **** * Control lock must be held at entry, and will be held at exit. */ static int ! ZeroSUBTRANSPage(int pageno) { return SimpleLruZeroPage(SubTransCtl, pageno); } --- 232,238 ---- * Control lock must be held at entry, and will be held at exit. */ static int ! ZeroSUBTRANSPage(int64 pageno) { return SimpleLruZeroPage(SubTransCtl, pageno); } *************** ZeroSUBTRANSPage(int pageno) *** 249,256 **** void StartupSUBTRANS(TransactionId oldestActiveXID) { ! int startPage; ! int endPage; /* * Since we don't expect pg_subtrans to be valid across crashes, we --- 247,254 ---- void StartupSUBTRANS(TransactionId oldestActiveXID) { ! int64 startPage; ! int64 endPage; /* * Since we don't expect pg_subtrans to be valid across crashes, we *************** StartupSUBTRANS(TransactionId oldestActi *** 267,275 **** { (void) ZeroSUBTRANSPage(startPage); startPage++; - /* must account for wraparound */ - if (startPage > TransactionIdToPage(MaxTransactionId)) - startPage = 0; } (void) ZeroSUBTRANSPage(startPage); --- 265,270 ---- *************** CheckPointSUBTRANS(void) *** 323,329 **** void ExtendSUBTRANS(TransactionId newestXact) { ! int pageno; /* * No work except at first XID of a page. But beware: just after --- 318,324 ---- void ExtendSUBTRANS(TransactionId newestXact) { ! int64 pageno; /* * No work except at first XID of a page. But beware: just after *************** ExtendSUBTRANS(TransactionId newestXact) *** 353,360 **** void TruncateSUBTRANS(TransactionId oldestXact) { - int cutoffPage; - /* * The cutoff point is the start of the segment containing oldestXact. We * pass the *page* containing oldestXact to SimpleLruTruncate. We step --- 348,353 ---- *************** TruncateSUBTRANS(TransactionId oldestXac *** 363,394 **** * a page and oldestXact == next XID. In that case, if we didn't subtract * one, we'd trigger SimpleLruTruncate's wraparound detection. */ ! TransactionIdRetreat(oldestXact); ! cutoffPage = TransactionIdToPage(oldestXact); ! ! SimpleLruTruncate(SubTransCtl, cutoffPage); ! } ! ! ! /* ! * Decide which of two SUBTRANS page numbers is "older" for truncation purposes. ! * ! * We need to use comparison of TransactionIds here in order to do the right ! * thing with wraparound XID arithmetic. However, if we are asked about ! * page number zero, we don't want to hand InvalidTransactionId to ! * TransactionIdPrecedes: it'll get weird about permanent xact IDs. So, ! * offset both xids by FirstNormalTransactionId to avoid that. ! */ ! static bool ! SubTransPagePrecedes(int page1, int page2) ! { ! TransactionId xid1; ! TransactionId xid2; ! ! xid1 = ((TransactionId) page1) * SUBTRANS_XACTS_PER_PAGE; ! xid1 += FirstNormalTransactionId; ! xid2 = ((TransactionId) page2) * SUBTRANS_XACTS_PER_PAGE; ! xid2 += FirstNormalTransactionId; ! return TransactionIdPrecedes(xid1, xid2); } --- 356,371 ---- * a page and oldestXact == next XID. In that case, if we didn't subtract * one, we'd trigger SimpleLruTruncate's wraparound detection. */ ! if (oldestXact > FirstNormalTransactionId) ! { ! int64 cutoffPage; ! TransactionIdRetreat(oldestXact); ! cutoffPage = TransactionIdToPage(oldestXact); ! SimpleLruTruncate(SubTransCtl, cutoffPage); ! } ! else ! { ! SimpleLruTruncate(SubTransCtl, 0); ! } } diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c new file mode 100644 index 968b232..36ca4ff *** a/src/backend/access/transam/transam.c --- b/src/backend/access/transam/transam.c *************** TransactionIdDidCommit(TransactionId tra *** 157,163 **** parentXid = SubTransGetParent(transactionId); if (!TransactionIdIsValid(parentXid)) { ! elog(WARNING, "no pg_subtrans entry for subcommitted XID %u", transactionId); return false; } --- 157,163 ---- parentXid = SubTransGetParent(transactionId); if (!TransactionIdIsValid(parentXid)) { ! elog(WARNING, "no pg_subtrans entry for subcommitted XID " XID_FMT, transactionId); return false; } *************** TransactionIdDidAbort(TransactionId tran *** 206,212 **** if (!TransactionIdIsValid(parentXid)) { /* see notes in TransactionIdDidCommit */ ! elog(WARNING, "no pg_subtrans entry for subcommitted XID %u", transactionId); return true; } --- 206,212 ---- if (!TransactionIdIsValid(parentXid)) { /* see notes in TransactionIdDidCommit */ ! elog(WARNING, "no pg_subtrans entry for subcommitted XID " XID_FMT, transactionId); return true; } *************** TransactionIdAbortTree(TransactionId xid *** 293,362 **** TRANSACTION_STATUS_ABORTED, InvalidXLogRecPtr); } - /* - * TransactionIdPrecedes --- is id1 logically < id2? - */ - bool - TransactionIdPrecedes(TransactionId id1, TransactionId id2) - { - /* - * If either ID is a permanent XID then we can just do unsigned - * comparison. If both are normal, do a modulo-2^32 comparison. - */ - int32 diff; - - if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) - return (id1 < id2); - - diff = (int32) (id1 - id2); - return (diff < 0); - } - - /* - * TransactionIdPrecedesOrEquals --- is id1 logically <= id2? - */ - bool - TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2) - { - int32 diff; - - if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) - return (id1 <= id2); - - diff = (int32) (id1 - id2); - return (diff <= 0); - } - - /* - * TransactionIdFollows --- is id1 logically > id2? - */ - bool - TransactionIdFollows(TransactionId id1, TransactionId id2) - { - int32 diff; - - if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) - return (id1 > id2); - - diff = (int32) (id1 - id2); - return (diff > 0); - } - - /* - * TransactionIdFollowsOrEquals --- is id1 logically >= id2? - */ - bool - TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2) - { - int32 diff; - - if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) - return (id1 >= id2); - - diff = (int32) (id1 - id2); - return (diff >= 0); - } - /* * TransactionIdLatest --- get latest XID among a main xact and its children --- 293,298 ---- diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c new file mode 100644 index ae83291..d3983ed *** a/src/backend/access/transam/twophase.c --- b/src/backend/access/transam/twophase.c *************** TwoPhaseGetGXact(TransactionId xid) *** 836,842 **** LWLockRelease(TwoPhaseStateLock); if (result == NULL) /* should not happen */ ! elog(ERROR, "failed to find GlobalTransaction for xid %u", xid); cached_xid = xid; cached_gxact = result; --- 836,842 ---- LWLockRelease(TwoPhaseStateLock); if (result == NULL) /* should not happen */ ! elog(ERROR, "failed to find GlobalTransaction for xid " XID_FMT, xid); cached_xid = xid; cached_gxact = result; *************** TwoPhaseGetDummyProc(TransactionId xid) *** 877,883 **** /************************************************************************/ #define TwoPhaseFilePath(path, xid) \ ! snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X", xid) /* * 2PC state file format: --- 877,883 ---- /************************************************************************/ #define TwoPhaseFilePath(path, xid) \ ! snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X", (uint32)(xid >> 32), (uint32)xid) /* * 2PC state file format: *************** restoreTwoPhaseData(void) *** 1739,1751 **** LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL) { ! if (strlen(clde->d_name) == 8 && ! strspn(clde->d_name, "0123456789ABCDEF") == 8) { TransactionId xid; char *buf; ! xid = (TransactionId) strtoul(clde->d_name, NULL, 16); buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, true, false, false); --- 1739,1751 ---- LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL) { ! if (strlen(clde->d_name) == 16 && ! strspn(clde->d_name, "0123456789ABCDEF") == 16) { TransactionId xid; char *buf; ! xid = (TransactionId) pg_strtouint64(clde->d_name, NULL, 16); buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, true, false, false); *************** RecoverPreparedTransactions(void) *** 1941,1947 **** continue; ereport(LOG, ! (errmsg("recovering prepared transaction %u from shared memory", xid))); hdr = (TwoPhaseFileHeader *) buf; Assert(TransactionIdEquals(hdr->xid, xid)); --- 1941,1947 ---- continue; ereport(LOG, ! (errmsg("recovering prepared transaction " XID_FMT " from shared memory", xid))); hdr = (TwoPhaseFileHeader *) buf; Assert(TransactionIdEquals(hdr->xid, xid)); *************** ProcessTwoPhaseBuffer(TransactionId xid, *** 2031,2044 **** if (fromdisk) { ereport(WARNING, ! (errmsg("removing stale two-phase state file for transaction %u", xid))); RemoveTwoPhaseFile(xid, true); } else { ereport(WARNING, ! (errmsg("removing stale two-phase state from memory for transaction %u", xid))); PrepareRedoRemove(xid, true); } --- 2031,2044 ---- if (fromdisk) { ereport(WARNING, ! (errmsg("removing stale two-phase state file for transaction " XID_FMT, xid))); RemoveTwoPhaseFile(xid, true); } else { ereport(WARNING, ! (errmsg("removing stale two-phase state from memory for transaction " XID_FMT, xid))); PrepareRedoRemove(xid, true); } *************** ProcessTwoPhaseBuffer(TransactionId xid, *** 2051,2064 **** if (fromdisk) { ereport(WARNING, ! (errmsg("removing future two-phase state file for transaction %u", xid))); RemoveTwoPhaseFile(xid, true); } else { ereport(WARNING, ! (errmsg("removing future two-phase state from memory for transaction %u", xid))); PrepareRedoRemove(xid, true); } --- 2051,2064 ---- if (fromdisk) { ereport(WARNING, ! (errmsg("removing future two-phase state file for transaction " XID_FMT, xid))); RemoveTwoPhaseFile(xid, true); } else { ereport(WARNING, ! (errmsg("removing future two-phase state from memory for transaction " XID_FMT, xid))); PrepareRedoRemove(xid, true); } *************** ProcessTwoPhaseBuffer(TransactionId xid, *** 2072,2078 **** if (buf == NULL) { ereport(WARNING, ! (errmsg("removing corrupt two-phase state file for transaction %u", xid))); RemoveTwoPhaseFile(xid, true); return NULL; --- 2072,2078 ---- if (buf == NULL) { ereport(WARNING, ! (errmsg("removing corrupt two-phase state file for transaction " XID_FMT, xid))); RemoveTwoPhaseFile(xid, true); return NULL; *************** ProcessTwoPhaseBuffer(TransactionId xid, *** 2091,2104 **** if (fromdisk) { ereport(WARNING, ! (errmsg("removing corrupt two-phase state file for transaction %u", xid))); RemoveTwoPhaseFile(xid, true); } else { ereport(WARNING, ! (errmsg("removing corrupt two-phase state from memory for transaction %u", xid))); PrepareRedoRemove(xid, true); } --- 2091,2104 ---- if (fromdisk) { ereport(WARNING, ! (errmsg("removing corrupt two-phase state file for transaction " XID_FMT, xid))); RemoveTwoPhaseFile(xid, true); } else { ereport(WARNING, ! (errmsg("removing corrupt two-phase state from memory for transaction " XID_FMT, xid))); PrepareRedoRemove(xid, true); } *************** RecordTransactionAbortPrepared(Transacti *** 2264,2270 **** * RecordTransactionCommitPrepared ... */ if (TransactionIdDidCommit(xid)) ! elog(PANIC, "cannot abort transaction %u, it was already committed", xid); START_CRIT_SECTION(); --- 2264,2270 ---- * RecordTransactionCommitPrepared ... */ if (TransactionIdDidCommit(xid)) ! elog(PANIC, "cannot abort transaction " XID_FMT ", it was already committed", xid); START_CRIT_SECTION(); *************** PrepareRedoAdd(char *buf, XLogRecPtr sta *** 2358,2364 **** Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts); TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts++] = gxact; ! elog(DEBUG2, "added 2PC data in shared memory for transaction %u", gxact->xid); } /* --- 2358,2364 ---- Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts); TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts++] = gxact; ! elog(DEBUG2, "added 2PC data in shared memory for transaction " XID_FMT, gxact->xid); } /* *************** PrepareRedoRemove(TransactionId xid, boo *** 2401,2407 **** /* * And now we can clean up any files we may have left. */ ! elog(DEBUG2, "removing 2PC data for transaction %u", xid); if (gxact->ondisk) RemoveTwoPhaseFile(xid, giveWarning); RemoveGXact(gxact); --- 2401,2407 ---- /* * And now we can clean up any files we may have left. */ ! elog(DEBUG2, "removing 2PC data for transaction " XID_FMT, xid); if (gxact->ondisk) RemoveTwoPhaseFile(xid, giveWarning); RemoveGXact(gxact); diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c new file mode 100644 index 702c8c9..2d5706f *** a/src/backend/access/transam/varsup.c --- b/src/backend/access/transam/varsup.c *************** GetNewTransactionId(bool isSubXact) *** 97,107 **** * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ - TransactionId xidWarnLimit = ShmemVariableCache->xidWarnLimit; - TransactionId xidStopLimit = ShmemVariableCache->xidStopLimit; - TransactionId xidWrapLimit = ShmemVariableCache->xidWrapLimit; - Oid oldest_datoid = ShmemVariableCache->oldestXidDB; - LWLockRelease(XidGenLock); /* --- 97,102 ---- *************** GetNewTransactionId(bool isSubXact) *** 112,159 **** if (IsUnderPostmaster && (xid % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - if (IsUnderPostmaster && - TransactionIdFollowsOrEquals(xid, xidStopLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands to avoid wraparound data loss in database \"%s\"", - oldest_datname), - errhint("Stop the postmaster and vacuum that database in single-user mode.\n" - "You might also need to commit or roll back old prepared transactions."))); - else - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands to avoid wraparound data loss in database with OID %u", - oldest_datoid), - errhint("Stop the postmaster and vacuum that database in single-user mode.\n" - "You might also need to commit or roll back old prepared transactions."))); - } - else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %u transactions", - oldest_datname, - xidWrapLimit - xid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions."))); - else - ereport(WARNING, - (errmsg("database with OID %u must be vacuumed within %u transactions", - oldest_datoid, - xidWrapLimit - xid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions."))); - } - /* Re-acquire lock and start over */ LWLockAcquire(XidGenLock, LW_EXCLUSIVE); xid = ShmemVariableCache->nextXid; --- 107,112 ---- *************** void *** 288,338 **** SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) { TransactionId xidVacLimit; - TransactionId xidWarnLimit; - TransactionId xidStopLimit; - TransactionId xidWrapLimit; TransactionId curXid; Assert(TransactionIdIsNormal(oldest_datfrozenxid)); /* - * The place where we actually get into deep trouble is halfway around - * from the oldest potentially-existing XID. (This calculation is - * probably off by one or two counts, because the special XIDs reduce the - * size of the loop a little bit. But we throw in plenty of slop below, - * so it doesn't matter.) - */ - xidWrapLimit = oldest_datfrozenxid + (MaxTransactionId >> 1); - if (xidWrapLimit < FirstNormalTransactionId) - xidWrapLimit += FirstNormalTransactionId; - - /* - * We'll refuse to continue assigning XIDs in interactive mode once we get - * within 1M transactions of data loss. This leaves lots of room for the - * DBA to fool around fixing things in a standalone backend, while not - * being significant compared to total XID space. (Note that since - * vacuuming requires one transaction per table cleaned, we had better be - * sure there's lots of XIDs left...) - */ - xidStopLimit = xidWrapLimit - 1000000; - if (xidStopLimit < FirstNormalTransactionId) - xidStopLimit -= FirstNormalTransactionId; - - /* - * We'll start complaining loudly when we get within 10M transactions of - * the stop point. This is kind of arbitrary, but if you let your gas - * gauge get down to 1% of full, would you be looking for the next gas - * station? We need to be fairly liberal about this number because there - * are lots of scenarios where most transactions are done by automatic - * clients that won't pay attention to warnings. (No, we're not gonna make - * this configurable. If you know enough to configure it, you know enough - * to not get in this kind of trouble in the first place.) - */ - xidWarnLimit = xidStopLimit - 10000000; - if (xidWarnLimit < FirstNormalTransactionId) - xidWarnLimit -= FirstNormalTransactionId; - - /* * We'll start trying to force autovacuums when oldest_datfrozenxid gets * to be more than autovacuum_freeze_max_age transactions old. * --- 241,251 ---- *************** SetTransactionIdLimit(TransactionId olde *** 355,372 **** LWLockAcquire(XidGenLock, LW_EXCLUSIVE); ShmemVariableCache->oldestXid = oldest_datfrozenxid; ShmemVariableCache->xidVacLimit = xidVacLimit; - ShmemVariableCache->xidWarnLimit = xidWarnLimit; - ShmemVariableCache->xidStopLimit = xidStopLimit; - ShmemVariableCache->xidWrapLimit = xidWrapLimit; ShmemVariableCache->oldestXidDB = oldest_datoid; curXid = ShmemVariableCache->nextXid; LWLockRelease(XidGenLock); - /* Log the info */ - ereport(DEBUG1, - (errmsg("transaction ID wrap limit is %u, limited by database with OID %u", - xidWrapLimit, oldest_datoid))); - /* * If past the autovacuum force point, immediately signal an autovac * request. The reason for this is that autovac only processes one --- 268,277 ---- *************** SetTransactionIdLimit(TransactionId olde *** 377,417 **** if (TransactionIdFollowsOrEquals(curXid, xidVacLimit) && IsUnderPostmaster && !InRecovery) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* Give an immediate warning if past the wrap warn point */ - if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit) && !InRecovery) - { - char *oldest_datname; - - /* - * We can be called when not inside a transaction, for example during - * StartupXLOG(). In such a case we cannot do database access, so we - * must just report the oldest DB's OID. - * - * Note: it's also possible that get_database_name fails and returns - * NULL, for example because the database just got dropped. We'll - * still warn, even though the warning might now be unnecessary. - */ - if (IsTransactionState()) - oldest_datname = get_database_name(oldest_datoid); - else - oldest_datname = NULL; - - if (oldest_datname) - ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %u transactions", - oldest_datname, - xidWrapLimit - curXid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions."))); - else - ereport(WARNING, - (errmsg("database with OID %u must be vacuumed within %u transactions", - oldest_datoid, - xidWrapLimit - curXid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions."))); - } } --- 282,287 ---- diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c new file mode 100644 index 93dca7a..bd9ba40 *** a/src/backend/access/transam/xact.c --- b/src/backend/access/transam/xact.c *************** RecordTransactionAbort(bool isSubXact) *** 1564,1570 **** * Check that we haven't aborted halfway through RecordTransactionCommit. */ if (TransactionIdDidCommit(xid)) ! elog(PANIC, "cannot abort transaction %u, it was already committed", xid); /* Fetch the data we need for the abort record */ --- 1564,1570 ---- * Check that we haven't aborted halfway through RecordTransactionCommit. */ if (TransactionIdDidCommit(xid)) ! elog(PANIC, "cannot abort transaction " XID_FMT ", it was already committed", xid); /* Fetch the data we need for the abort record */ *************** ShowTransactionStateRec(const char *str, *** 5123,5131 **** { int i; ! appendStringInfo(&buf, ", children: %u", s->childXids[0]); for (i = 1; i < s->nChildXids; i++) ! appendStringInfo(&buf, " %u", s->childXids[i]); } if (s->parent) --- 5123,5131 ---- { int i; ! appendStringInfo(&buf, ", children: " XID_FMT, s->childXids[0]); for (i = 1; i < s->nChildXids; i++) ! appendStringInfo(&buf, " " XID_FMT, s->childXids[i]); } if (s->parent) *************** ShowTransactionStateRec(const char *str, *** 5133,5145 **** /* use ereport to suppress computation if msg will not be printed */ ereport(DEBUG5, ! (errmsg_internal("%s(%d) name: %s; blockState: %s; state: %s, xid/subid/cid: %u/%u/%u%s%s", str, s->nestingLevel, PointerIsValid(s->name) ? s->name : "unnamed", BlockStateAsString(s->blockState), TransStateAsString(s->state), ! (unsigned int) s->transactionId, ! (unsigned int) s->subTransactionId, (unsigned int) currentCommandId, currentCommandIdUsed ? " (used)" : "", buf.data))); --- 5133,5145 ---- /* use ereport to suppress computation if msg will not be printed */ ereport(DEBUG5, ! (errmsg_internal("%s(%d) name: %s; blockState: %s; state: %s, xid/subid/cid: " XID_FMT "/" XID_FMT "/%u%s%s", str, s->nestingLevel, PointerIsValid(s->name) ? s->name : "unnamed", BlockStateAsString(s->blockState), TransStateAsString(s->state), ! s->transactionId, ! s->subTransactionId, (unsigned int) currentCommandId, currentCommandIdUsed ? " (used)" : "", buf.data))); *************** XactLogCommitRecord(TimestampTz commit_t *** 5270,5278 **** xl_xact_xinfo xl_xinfo; xl_xact_dbinfo xl_dbinfo; xl_xact_subxacts xl_subxacts; xl_xact_relfilenodes xl_relfilenodes; xl_xact_invals xl_invals; - xl_xact_twophase xl_twophase; xl_xact_origin xl_origin; uint8 info; --- 5270,5278 ---- xl_xact_xinfo xl_xinfo; xl_xact_dbinfo xl_dbinfo; xl_xact_subxacts xl_subxacts; + xl_xact_twophase xl_twophase; xl_xact_relfilenodes xl_relfilenodes; xl_xact_invals xl_invals; xl_xact_origin xl_origin; uint8 info; *************** XactLogCommitRecord(TimestampTz commit_t *** 5322,5327 **** --- 5322,5333 ---- xl_subxacts.nsubxacts = nsubxacts; } + if (TransactionIdIsValid(twophase_xid)) + { + xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; + xl_twophase.xid = twophase_xid; + } + if (nrels > 0) { xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILENODES; *************** XactLogCommitRecord(TimestampTz commit_t *** 5334,5345 **** xl_invals.nmsgs = nmsgs; } - if (TransactionIdIsValid(twophase_xid)) - { - xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; - xl_twophase.xid = twophase_xid; - } - /* dump transaction origin information */ if (replorigin_session_origin != InvalidRepOriginId) { --- 5340,5345 ---- *************** XactLogCommitRecord(TimestampTz commit_t *** 5372,5377 **** --- 5372,5380 ---- nsubxacts * sizeof(TransactionId)); } + if (xl_xinfo.xinfo & XACT_XINFO_HAS_TWOPHASE) + XLogRegisterData((char *) (&xl_twophase), sizeof(xl_xact_twophase)); + if (xl_xinfo.xinfo & XACT_XINFO_HAS_RELFILENODES) { XLogRegisterData((char *) (&xl_relfilenodes), *************** XactLogCommitRecord(TimestampTz commit_t *** 5387,5395 **** nmsgs * sizeof(SharedInvalidationMessage)); } - if (xl_xinfo.xinfo & XACT_XINFO_HAS_TWOPHASE) - XLogRegisterData((char *) (&xl_twophase), sizeof(xl_xact_twophase)); - if (xl_xinfo.xinfo & XACT_XINFO_HAS_ORIGIN) XLogRegisterData((char *) (&xl_origin), sizeof(xl_xact_origin)); --- 5390,5395 ---- *************** XactLogAbortRecord(TimestampTz abort_tim *** 5414,5421 **** xl_xact_abort xlrec; xl_xact_xinfo xl_xinfo; xl_xact_subxacts xl_subxacts; - xl_xact_relfilenodes xl_relfilenodes; xl_xact_twophase xl_twophase; uint8 info; --- 5414,5421 ---- xl_xact_abort xlrec; xl_xact_xinfo xl_xinfo; xl_xact_subxacts xl_subxacts; xl_xact_twophase xl_twophase; + xl_xact_relfilenodes xl_relfilenodes; uint8 info; *************** XactLogAbortRecord(TimestampTz abort_tim *** 5443,5460 **** xl_subxacts.nsubxacts = nsubxacts; } - if (nrels > 0) - { - xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILENODES; - xl_relfilenodes.nrels = nrels; - } - if (TransactionIdIsValid(twophase_xid)) { xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; xl_twophase.xid = twophase_xid; } if (xl_xinfo.xinfo != 0) info |= XLOG_XACT_HAS_INFO; --- 5443,5460 ---- xl_subxacts.nsubxacts = nsubxacts; } if (TransactionIdIsValid(twophase_xid)) { xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; xl_twophase.xid = twophase_xid; } + if (nrels > 0) + { + xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILENODES; + xl_relfilenodes.nrels = nrels; + } + if (xl_xinfo.xinfo != 0) info |= XLOG_XACT_HAS_INFO; *************** XactLogAbortRecord(TimestampTz abort_tim *** 5475,5480 **** --- 5475,5483 ---- nsubxacts * sizeof(TransactionId)); } + if (xl_xinfo.xinfo & XACT_XINFO_HAS_TWOPHASE) + XLogRegisterData((char *) (&xl_twophase), sizeof(xl_xact_twophase)); + if (xl_xinfo.xinfo & XACT_XINFO_HAS_RELFILENODES) { XLogRegisterData((char *) (&xl_relfilenodes), *************** XactLogAbortRecord(TimestampTz abort_tim *** 5483,5491 **** nrels * sizeof(RelFileNode)); } - if (xl_xinfo.xinfo & XACT_XINFO_HAS_TWOPHASE) - XLogRegisterData((char *) (&xl_twophase), sizeof(xl_xact_twophase)); - return XLogInsert(RM_XACT_ID, info); } --- 5486,5491 ---- diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c new file mode 100644 index a3e8ce0..c4e494a *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *************** typedef struct XLogCtlData *** 577,583 **** /* Protected by info_lck: */ XLogwrtRqst LogwrtRqst; XLogRecPtr RedoRecPtr; /* a recent copy of Insert->RedoRecPtr */ - uint32 ckptXidEpoch; /* nextXID & epoch of latest checkpoint */ TransactionId ckptXid; XLogRecPtr asyncXactLSN; /* LSN of newest async commit/abort */ XLogRecPtr replicationSlotMinLSN; /* oldest LSN needed by any slot */ --- 577,582 ---- *************** BootStrapXLOG(void) *** 5009,5022 **** checkPoint.ThisTimeLineID = ThisTimeLineID; checkPoint.PrevTimeLineID = ThisTimeLineID; checkPoint.fullPageWrites = fullPageWrites; ! checkPoint.nextXidEpoch = 0; ! checkPoint.nextXid = FirstNormalTransactionId; checkPoint.nextOid = FirstBootstrapObjectId; ! checkPoint.nextMulti = FirstMultiXactId; checkPoint.nextMultiOffset = 0; ! checkPoint.oldestXid = FirstNormalTransactionId; checkPoint.oldestXidDB = TemplateDbOid; ! checkPoint.oldestMulti = FirstMultiXactId; checkPoint.oldestMultiDB = TemplateDbOid; checkPoint.oldestCommitTsXid = InvalidTransactionId; checkPoint.newestCommitTsXid = InvalidTransactionId; --- 5008,5020 ---- checkPoint.ThisTimeLineID = ThisTimeLineID; checkPoint.PrevTimeLineID = ThisTimeLineID; checkPoint.fullPageWrites = fullPageWrites; ! checkPoint.nextXid = FirstNormalTransactionId + 1; checkPoint.nextOid = FirstBootstrapObjectId; ! checkPoint.nextMulti = FirstMultiXactId + 1; checkPoint.nextMultiOffset = 0; ! checkPoint.oldestXid = checkPoint.nextXid - 1; checkPoint.oldestXidDB = TemplateDbOid; ! checkPoint.oldestMulti = checkPoint.nextMulti - 1; checkPoint.oldestMultiDB = TemplateDbOid; checkPoint.oldestCommitTsXid = InvalidTransactionId; checkPoint.newestCommitTsXid = InvalidTransactionId; *************** readRecoveryCommandFile(void) *** 5250,5263 **** else if (strcmp(item->name, "recovery_target_xid") == 0) { errno = 0; ! recoveryTargetXid = (TransactionId) strtoul(item->value, NULL, 0); if (errno == EINVAL || errno == ERANGE) ereport(FATAL, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("recovery_target_xid is not a valid number: \"%s\"", item->value))); ereport(DEBUG2, ! (errmsg_internal("recovery_target_xid = %u", recoveryTargetXid))); recoveryTarget = RECOVERY_TARGET_XID; } --- 5248,5261 ---- else if (strcmp(item->name, "recovery_target_xid") == 0) { errno = 0; ! recoveryTargetXid = (TransactionId) pg_strtouint64(item->value, NULL, 0); if (errno == EINVAL || errno == ERANGE) ereport(FATAL, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("recovery_target_xid is not a valid number: \"%s\"", item->value))); ereport(DEBUG2, ! (errmsg_internal("recovery_target_xid = " XID_FMT, recoveryTargetXid))); recoveryTarget = RECOVERY_TARGET_XID; } *************** recoveryStopsBefore(XLogReaderState *rec *** 5746,5759 **** if (isCommit) { ereport(LOG, ! (errmsg("recovery stopping before commit of transaction %u, time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } else { ereport(LOG, ! (errmsg("recovery stopping before abort of transaction %u, time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } --- 5744,5757 ---- if (isCommit) { ereport(LOG, ! (errmsg("recovery stopping before commit of transaction " XID_FMT ", time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } else { ereport(LOG, ! (errmsg("recovery stopping before abort of transaction " XID_FMT ", time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } *************** recoveryStopsAfter(XLogReaderState *reco *** 5885,5891 **** xact_info == XLOG_XACT_COMMIT_PREPARED) { ereport(LOG, ! (errmsg("recovery stopping after commit of transaction %u, time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } --- 5883,5889 ---- xact_info == XLOG_XACT_COMMIT_PREPARED) { ereport(LOG, ! (errmsg("recovery stopping after commit of transaction " XID_FMT ", time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } *************** recoveryStopsAfter(XLogReaderState *reco *** 5893,5899 **** xact_info == XLOG_XACT_ABORT_PREPARED) { ereport(LOG, ! (errmsg("recovery stopping after abort of transaction %u, time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } --- 5891,5897 ---- xact_info == XLOG_XACT_ABORT_PREPARED) { ereport(LOG, ! (errmsg("recovery stopping after abort of transaction " XID_FMT ", time %s", recoveryStopXid, timestamptz_to_str(recoveryStopTime)))); } *************** StartupXLOG(void) *** 6328,6334 **** (errmsg("entering standby mode"))); else if (recoveryTarget == RECOVERY_TARGET_XID) ereport(LOG, ! (errmsg("starting point-in-time recovery to XID %u", recoveryTargetXid))); else if (recoveryTarget == RECOVERY_TARGET_TIME) ereport(LOG, --- 6326,6332 ---- (errmsg("entering standby mode"))); else if (recoveryTarget == RECOVERY_TARGET_XID) ereport(LOG, ! (errmsg("starting point-in-time recovery to XID " XID_FMT, recoveryTargetXid))); else if (recoveryTarget == RECOVERY_TARGET_TIME) ereport(LOG, *************** StartupXLOG(void) *** 6621,6642 **** (uint32) (checkPoint.redo >> 32), (uint32) checkPoint.redo, wasShutdown ? "TRUE" : "FALSE"))); ereport(DEBUG1, ! (errmsg_internal("next transaction ID: %u:%u; next OID: %u", ! checkPoint.nextXidEpoch, checkPoint.nextXid, ! checkPoint.nextOid))); ereport(DEBUG1, ! (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u", checkPoint.nextMulti, checkPoint.nextMultiOffset))); ereport(DEBUG1, ! (errmsg_internal("oldest unfrozen transaction ID: %u, in database %u", checkPoint.oldestXid, checkPoint.oldestXidDB))); ereport(DEBUG1, ! (errmsg_internal("oldest MultiXactId: %u, in database %u", checkPoint.oldestMulti, checkPoint.oldestMultiDB))); ereport(DEBUG1, ! (errmsg_internal("commit timestamp Xid oldest/newest: %u/%u", ! checkPoint.oldestCommitTsXid, ! checkPoint.newestCommitTsXid))); if (!TransactionIdIsNormal(checkPoint.nextXid)) ereport(PANIC, (errmsg("invalid next transaction ID"))); --- 6619,6640 ---- (uint32) (checkPoint.redo >> 32), (uint32) checkPoint.redo, wasShutdown ? "TRUE" : "FALSE"))); ereport(DEBUG1, ! (errmsg_internal("next transaction ID: " XID_FMT " next OID: %u", ! checkPoint.nextXid, ! checkPoint.nextOid))); ereport(DEBUG1, ! (errmsg_internal("next MultiXactId: " XID_FMT "; next MultiXactOffset: " INT64_FORMAT, checkPoint.nextMulti, checkPoint.nextMultiOffset))); ereport(DEBUG1, ! (errmsg_internal("oldest unfrozen transaction ID: " XID_FMT ", in database %u", checkPoint.oldestXid, checkPoint.oldestXidDB))); ereport(DEBUG1, ! (errmsg_internal("oldest MultiXactId: " XID_FMT ", in database %u", checkPoint.oldestMulti, checkPoint.oldestMultiDB))); ereport(DEBUG1, ! (errmsg_internal("commit timestamp Xid oldest/newest: " XID_FMT "/" XID_FMT, ! checkPoint.oldestCommitTsXid, ! checkPoint.newestCommitTsXid))); if (!TransactionIdIsNormal(checkPoint.nextXid)) ereport(PANIC, (errmsg("invalid next transaction ID"))); *************** StartupXLOG(void) *** 6651,6657 **** SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true); SetCommitTsLimit(checkPoint.oldestCommitTsXid, checkPoint.newestCommitTsXid); - XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch; XLogCtl->ckptXid = checkPoint.nextXid; /* --- 6649,6654 ---- *************** StartupXLOG(void) *** 7431,7437 **** */ if (recoveryTarget == RECOVERY_TARGET_XID) snprintf(reason, sizeof(reason), ! "%s transaction %u", recoveryStopAfter ? "after" : "before", recoveryStopXid); else if (recoveryTarget == RECOVERY_TARGET_TIME) --- 7428,7434 ---- */ if (recoveryTarget == RECOVERY_TARGET_XID) snprintf(reason, sizeof(reason), ! "%s transaction " XID_FMT, recoveryStopAfter ? "after" : "before", recoveryStopXid); else if (recoveryTarget == RECOVERY_TARGET_TIME) *************** GetLastSegSwitchData(XLogRecPtr *lastSwi *** 8297,8337 **** } /* - * GetNextXidAndEpoch - get the current nextXid value and associated epoch - * - * This is exported for use by code that would like to have 64-bit XIDs. - * We don't really support such things, but all XIDs within the system - * can be presumed "close to" the result, and thus the epoch associated - * with them can be determined. - */ - void - GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch) - { - uint32 ckptXidEpoch; - TransactionId ckptXid; - TransactionId nextXid; - - /* Must read checkpoint info first, else have race condition */ - SpinLockAcquire(&XLogCtl->info_lck); - ckptXidEpoch = XLogCtl->ckptXidEpoch; - ckptXid = XLogCtl->ckptXid; - SpinLockRelease(&XLogCtl->info_lck); - - /* Now fetch current nextXid */ - nextXid = ReadNewTransactionId(); - - /* - * nextXid is certainly logically later than ckptXid. So if it's - * numerically less, it must have wrapped into the next epoch. - */ - if (nextXid < ckptXid) - ckptXidEpoch++; - - *xid = nextXid; - *epoch = ckptXidEpoch; - } - - /* * This must be called ONCE during postmaster or standalone-backend shutdown */ void --- 8294,8299 ---- *************** CreateCheckPoint(int flags) *** 8750,8760 **** checkPoint.newestCommitTsXid = ShmemVariableCache->newestCommitTsXid; LWLockRelease(CommitTsLock); - /* Increase XID epoch if we've wrapped around since last checkpoint */ - checkPoint.nextXidEpoch = ControlFile->checkPointCopy.nextXidEpoch; - if (checkPoint.nextXid < ControlFile->checkPointCopy.nextXid) - checkPoint.nextXidEpoch++; - LWLockAcquire(OidGenLock, LW_SHARED); checkPoint.nextOid = ShmemVariableCache->nextOid; if (!shutdown) --- 8712,8717 ---- *************** CreateCheckPoint(int flags) *** 8897,8905 **** UpdateControlFile(); LWLockRelease(ControlFileLock); ! /* Update shared-memory copy of checkpoint XID/epoch */ SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch; XLogCtl->ckptXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); --- 8854,8861 ---- UpdateControlFile(); LWLockRelease(ControlFileLock); ! /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); *************** xlog_redo(XLogReaderState *record) *** 9724,9735 **** } /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ - ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch; ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; ! /* Update shared-memory copy of checkpoint XID/epoch */ SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch; XLogCtl->ckptXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); --- 9680,9689 ---- } /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; ! /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); *************** xlog_redo(XLogReaderState *record) *** 9774,9785 **** SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ - ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch; ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; ! /* Update shared-memory copy of checkpoint XID/epoch */ SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch; XLogCtl->ckptXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); --- 9728,9737 ---- SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; ! /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c new file mode 100644 index 0453fd4..509d620 *** a/src/backend/bootstrap/bootstrap.c --- b/src/backend/bootstrap/bootstrap.c *************** static const struct typinfo TypInfo[] = *** 127,133 **** F_OIDIN, F_OIDOUT}, {"tid", TIDOID, 0, 6, false, 's', 'p', InvalidOid, F_TIDIN, F_TIDOUT}, ! {"xid", XIDOID, 0, 4, true, 'i', 'p', InvalidOid, F_XIDIN, F_XIDOUT}, {"cid", CIDOID, 0, 4, true, 'i', 'p', InvalidOid, F_CIDIN, F_CIDOUT}, --- 127,133 ---- F_OIDIN, F_OIDOUT}, {"tid", TIDOID, 0, 6, false, 's', 'p', InvalidOid, F_TIDIN, F_TIDOUT}, ! {"xid", XIDOID, 0, 8, FLOAT8PASSBYVAL, 'd', 'p', InvalidOid, F_XIDIN, F_XIDOUT}, {"cid", CIDOID, 0, 4, true, 'i', 'p', InvalidOid, F_CIDIN, F_CIDOUT}, diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c new file mode 100644 index 05e7081..fe98e88 *** a/src/backend/catalog/heap.c --- b/src/backend/catalog/heap.c *************** static FormData_pg_attribute a2 = { *** 156,162 **** static FormData_pg_attribute a3 = { 0, {"xmin"}, XIDOID, 0, sizeof(TransactionId), MinTransactionIdAttributeNumber, 0, -1, -1, ! true, 'p', 'i', true, false, '\0', false, true, 0 }; static FormData_pg_attribute a4 = { --- 156,162 ---- static FormData_pg_attribute a3 = { 0, {"xmin"}, XIDOID, 0, sizeof(TransactionId), MinTransactionIdAttributeNumber, 0, -1, -1, ! FLOAT8PASSBYVAL, 'p', 'd', true, false, '\0', false, true, 0 }; static FormData_pg_attribute a4 = { *************** static FormData_pg_attribute a4 = { *** 168,174 **** static FormData_pg_attribute a5 = { 0, {"xmax"}, XIDOID, 0, sizeof(TransactionId), MaxTransactionIdAttributeNumber, 0, -1, -1, ! true, 'p', 'i', true, false, '\0', false, true, 0 }; static FormData_pg_attribute a6 = { --- 168,174 ---- static FormData_pg_attribute a5 = { 0, {"xmax"}, XIDOID, 0, sizeof(TransactionId), MaxTransactionIdAttributeNumber, 0, -1, -1, ! FLOAT8PASSBYVAL, 'p', 'd', true, false, '\0', false, true, 0 }; static FormData_pg_attribute a6 = { diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c new file mode 100644 index c7b2f03..b960da7 *** a/src/backend/catalog/index.c --- b/src/backend/catalog/index.c *************** IndexBuildHeapRangeScan(Relation heapRel *** 2415,2421 **** * before commit there. Give a warning if neither case * applies. */ ! xwait = HeapTupleHeaderGetXmin(heapTuple->t_data); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) --- 2415,2421 ---- * before commit there. Give a warning if neither case * applies. */ ! xwait = HeapTupleGetXmin(heapTuple); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) *************** IndexBuildHeapRangeScan(Relation heapRel *** 2463,2469 **** break; } ! xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) --- 2463,2469 ---- break; } ! xwait = HeapTupleGetUpdateXidAny(heapTuple); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c new file mode 100644 index 08fc18e..d12f2c1 *** a/src/backend/commands/analyze.c --- b/src/backend/commands/analyze.c *************** acquire_sample_rows(Relation onerel, int *** 1068,1073 **** --- 1068,1074 ---- targtuple.t_tableOid = RelationGetRelid(onerel); targtuple.t_data = (HeapTupleHeader) PageGetItem(targpage, itemid); targtuple.t_len = ItemIdGetLength(itemid); + HeapTupleCopyBaseFromPage(&targtuple, targpage); switch (HeapTupleSatisfiesVacuum(&targtuple, OldestXmin, *************** acquire_sample_rows(Relation onerel, int *** 1105,1111 **** * has to adjust the numbers we send to the stats * collector to make this come out right.) */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple.t_data))) { sample_it = true; liverows += 1; --- 1106,1112 ---- * has to adjust the numbers we send to the stats * collector to make this come out right.) */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(&targtuple))) { sample_it = true; liverows += 1; *************** acquire_sample_rows(Relation onerel, int *** 1125,1131 **** * right. (Note: this works out properly when the row was * both inserted and deleted in our xact.) */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple.t_data))) deadrows += 1; else liverows += 1; --- 1126,1132 ---- * right. (Note: this works out properly when the row was * both inserted and deleted in our xact.) */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(&targtuple))) deadrows += 1; else liverows += 1; diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c new file mode 100644 index bacc08e..15e45f7 *** a/src/backend/commands/async.c --- b/src/backend/commands/async.c *************** typedef struct QueuePosition *** 199,211 **** /* choose logically smaller QueuePosition */ #define QUEUE_POS_MIN(x,y) \ ! (asyncQueuePagePrecedes((x).page, (y).page) ? (x) : \ (x).page != (y).page ? (y) : \ (x).offset < (y).offset ? (x) : (y)) /* choose logically larger QueuePosition */ #define QUEUE_POS_MAX(x,y) \ ! (asyncQueuePagePrecedes((x).page, (y).page) ? (y) : \ (x).page != (y).page ? (x) : \ (x).offset > (y).offset ? (x) : (y)) --- 199,211 ---- /* choose logically smaller QueuePosition */ #define QUEUE_POS_MIN(x,y) \ ! (((x).page < (y).page) ? (x) : \ (x).page != (y).page ? (y) : \ (x).offset < (y).offset ? (x) : (y)) /* choose logically larger QueuePosition */ #define QUEUE_POS_MAX(x,y) \ ! (((x).page < (y).page) ? (y) : \ (x).page != (y).page ? (x) : \ (x).offset > (y).offset ? (x) : (y)) *************** static bool backendHasSentNotifications *** 368,374 **** bool Trace_notify = false; /* local function prototypes */ - static bool asyncQueuePagePrecedes(int p, int q); static void queue_listen(ListenActionKind action, const char *channel); static void Async_UnlistenOnExit(int code, Datum arg); static void Exec_ListenPreCommit(void); --- 368,373 ---- *************** static bool AsyncExistsPendingNotify(con *** 394,422 **** static void ClearPendingActionsAndNotifies(void); /* - * We will work on the page range of 0..QUEUE_MAX_PAGE. - */ - static bool - asyncQueuePagePrecedes(int p, int q) - { - int diff; - - /* - * We have to compare modulo (QUEUE_MAX_PAGE+1)/2. Both inputs should be - * in the range 0..QUEUE_MAX_PAGE. - */ - Assert(p >= 0 && p <= QUEUE_MAX_PAGE); - Assert(q >= 0 && q <= QUEUE_MAX_PAGE); - - diff = p - q; - if (diff >= ((QUEUE_MAX_PAGE + 1) / 2)) - diff -= QUEUE_MAX_PAGE + 1; - else if (diff < -((QUEUE_MAX_PAGE + 1) / 2)) - diff += QUEUE_MAX_PAGE + 1; - return diff < 0; - } - - /* * Report space needed for our shared memory area */ Size --- 393,398 ---- *************** AsyncShmemInit(void) *** 475,481 **** /* * Set up SLRU management of the pg_notify data. */ - AsyncCtl->PagePrecedes = asyncQueuePagePrecedes; SimpleLruInit(AsyncCtl, "async", NUM_ASYNC_BUFFERS, 0, AsyncCtlLock, "pg_notify", LWTRANCHE_ASYNC_BUFFERS); /* Override default assumption that writes should be fsync'd */ --- 451,456 ---- *************** asyncQueueIsFull(void) *** 1234,1240 **** nexthead = 0; /* wrap around */ boundary = QUEUE_POS_PAGE(QUEUE_TAIL); boundary -= boundary % SLRU_PAGES_PER_SEGMENT; ! return asyncQueuePagePrecedes(nexthead, boundary); } /* --- 1209,1215 ---- nexthead = 0; /* wrap around */ boundary = QUEUE_POS_PAGE(QUEUE_TAIL); boundary -= boundary % SLRU_PAGES_PER_SEGMENT; ! return (nexthead < boundary); } /* *************** asyncQueueAdvanceTail(void) *** 2012,2018 **** */ newtailpage = QUEUE_POS_PAGE(min); boundary = newtailpage - (newtailpage % SLRU_PAGES_PER_SEGMENT); ! if (asyncQueuePagePrecedes(oldtailpage, boundary)) { /* * SimpleLruTruncate() will ask for AsyncCtlLock but will also release --- 1987,1993 ---- */ newtailpage = QUEUE_POS_PAGE(min); boundary = newtailpage - (newtailpage % SLRU_PAGES_PER_SEGMENT); ! if (oldtailpage < boundary) { /* * SimpleLruTruncate() will ask for AsyncCtlLock but will also release diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c new file mode 100644 index 48f1e6e..bd01514 *** a/src/backend/commands/cluster.c --- b/src/backend/commands/cluster.c *************** copy_heap_data(Oid OIDNewHeap, Oid OIDOl *** 991,997 **** * case we had better copy it. */ if (!is_system_catalog && ! !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) elog(WARNING, "concurrent insert in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as live */ --- 991,997 ---- * case we had better copy it. */ if (!is_system_catalog && ! !TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple))) elog(WARNING, "concurrent insert in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as live */ *************** copy_heap_data(Oid OIDNewHeap, Oid OIDOl *** 1003,1009 **** * Similar situation to INSERT_IN_PROGRESS case. */ if (!is_system_catalog && ! !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data))) elog(WARNING, "concurrent delete in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as recently dead */ --- 1003,1009 ---- * Similar situation to INSERT_IN_PROGRESS case. */ if (!is_system_catalog && ! !TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple))) elog(WARNING, "concurrent delete in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as recently dead */ diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c new file mode 100644 index cfa3f05..a26c33b *** a/src/backend/commands/copy.c --- b/src/backend/commands/copy.c *************** CopyFrom(CopyState cstate) *** 2571,2576 **** --- 2571,2577 ---- * t_tableOid before evaluating them. */ tuple->t_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc); + HeapTupleSetZeroBase(tuple); /* Triggers and stuff need to be invoked in query context. */ MemoryContextSwitchTo(oldcontext); diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c new file mode 100644 index 7c47bef..f38f811 *** a/src/backend/commands/sequence.c --- b/src/backend/commands/sequence.c *************** fill_seq_with_data(Relation rel, HeapTup *** 338,343 **** --- 338,344 ---- page = BufferGetPage(buf); PageInit(page, BufferGetPageSize(buf), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; HeapPageGetSpecial(page)->pd_magic = SEQ_PAGE_MAGIC; /* Now insert sequence tuple */ *************** fill_seq_with_data(Relation rel, HeapTup *** 351,360 **** * because if the current transaction aborts, no other xact will ever * examine the sequence tuple anyway. */ ! HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); HeapTupleHeaderSetXminFrozen(tuple->t_data); HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); ! HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); --- 352,361 ---- * because if the current transaction aborts, no other xact will ever * examine the sequence tuple anyway. */ ! HeapTupleSetXmin(tuple, FrozenTransactionId); HeapTupleHeaderSetXminFrozen(tuple->t_data); HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); ! HeapTupleSetXmax(tuple, InvalidTransactionId); tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); *************** read_seq_tuple(Relation rel, Buffer *buf *** 1176,1181 **** --- 1177,1183 ---- /* Note we currently only bother to set these two fields of *seqdatatuple */ seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); seqdatatuple->t_len = ItemIdGetLength(lp); + HeapTupleCopyBaseFromPage(seqdatatuple, page); /* * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on *************** read_seq_tuple(Relation rel, Buffer *buf *** 1186,1194 **** * this again if the update gets lost. */ Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); ! if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) { ! HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; MarkBufferDirtyHint(*buf, true); --- 1188,1196 ---- * this again if the update gets lost. */ Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); ! if (HeapTupleGetRawXmax(seqdatatuple) != InvalidTransactionId) { ! HeapTupleSetXmax(seqdatatuple, InvalidTransactionId); seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; MarkBufferDirtyHint(*buf, true); *************** seq_redo(XLogReaderState *record) *** 1888,1893 **** --- 1890,1896 ---- localpage = (Page) palloc(BufferGetPageSize(buffer)); PageInit(localpage, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; HeapPageGetSpecial(page)->pd_magic = SEQ_PAGE_MAGIC; item = (char *) xlrec + sizeof(xl_seq_rec); diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c new file mode 100644 index 269c9e1..bba9c0a *** a/src/backend/commands/trigger.c --- b/src/backend/commands/trigger.c *************** ltrmark:; *** 3183,3188 **** --- 3183,3189 ---- tuple.t_len = ItemIdGetLength(lp); tuple.t_self = *tid; tuple.t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(&tuple, page); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c new file mode 100644 index faa1812..185b7ee *** a/src/backend/commands/vacuum.c --- b/src/backend/commands/vacuum.c *************** *** 55,64 **** /* * GUC parameters */ ! int vacuum_freeze_min_age; ! int vacuum_freeze_table_age; ! int vacuum_multixact_freeze_min_age; ! int vacuum_multixact_freeze_table_age; /* A few variables that don't seem worth passing around as parameters */ --- 55,64 ---- /* * GUC parameters */ ! int64 vacuum_freeze_min_age; ! int64 vacuum_freeze_table_age; ! int64 vacuum_multixact_freeze_min_age; ! int64 vacuum_multixact_freeze_table_age; /* A few variables that don't seem worth passing around as parameters */ *************** get_rel_oids(Oid relid, const RangeVar * *** 499,521 **** */ void vacuum_set_xid_limits(Relation rel, ! int freeze_min_age, ! int freeze_table_age, ! int multixact_freeze_min_age, ! int multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit) { ! int freezemin; ! int mxid_freezemin; ! int effective_multixact_freeze_max_age; TransactionId limit; TransactionId safeLimit; MultiXactId mxactLimit; MultiXactId safeMxactLimit; /* * We can always ignore processes running lazy vacuum. This is because we --- 499,524 ---- */ void vacuum_set_xid_limits(Relation rel, ! int64 freeze_min_age, ! int64 freeze_table_age, ! int64 multixact_freeze_min_age, ! int64 multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, MultiXactId *multiXactCutoff, MultiXactId *mxactFullScanLimit) { ! int64 freezemin; ! int64 mxid_freezemin; ! int64 effective_multixact_freeze_max_age; TransactionId limit; TransactionId safeLimit; + TransactionId nextXid; + MultiXactId oldestMxact; MultiXactId mxactLimit; MultiXactId safeMxactLimit; + MultiXactId nextMxactId; /* * We can always ignore processes running lazy vacuum. This is because we *************** vacuum_set_xid_limits(Relation rel, *** 546,553 **** /* * Compute the cutoff XID, being careful not to generate a "permanent" XID */ ! limit = *oldestXmin - freezemin; ! if (!TransactionIdIsNormal(limit)) limit = FirstNormalTransactionId; /* --- 549,558 ---- /* * Compute the cutoff XID, being careful not to generate a "permanent" XID */ ! limit = *oldestXmin; ! if (limit > FirstNormalTransactionId + freezemin) ! limit -= freezemin; ! else limit = FirstNormalTransactionId; /* *************** vacuum_set_xid_limits(Relation rel, *** 555,562 **** * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum * freeze age of zero. */ ! safeLimit = ReadNewTransactionId() - autovacuum_freeze_max_age; ! if (!TransactionIdIsNormal(safeLimit)) safeLimit = FirstNormalTransactionId; if (TransactionIdPrecedes(limit, safeLimit)) --- 560,569 ---- * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum * freeze age of zero. */ ! nextXid = ReadNewTransactionId(); ! if (nextXid > FirstNormalTransactionId + autovacuum_freeze_max_age) ! safeLimit = nextXid - autovacuum_freeze_max_age; ! else safeLimit = FirstNormalTransactionId; if (TransactionIdPrecedes(limit, safeLimit)) *************** vacuum_set_xid_limits(Relation rel, *** 574,580 **** * normally autovacuum_multixact_freeze_max_age, but may be less if we are * short of multixact member space. */ ! effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); /* * Determine the minimum multixact freeze age to use: as specified by --- 581,587 ---- * normally autovacuum_multixact_freeze_max_age, but may be less if we are * short of multixact member space. */ ! effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age; /* * Determine the minimum multixact freeze age to use: as specified by *************** vacuum_set_xid_limits(Relation rel, *** 590,608 **** Assert(mxid_freezemin >= 0); /* compute the cutoff multi, being careful to generate a valid value */ ! mxactLimit = GetOldestMultiXactId() - mxid_freezemin; ! if (mxactLimit < FirstMultiXactId) mxactLimit = FirstMultiXactId; ! safeMxactLimit = ! ReadNextMultiXactId() - effective_multixact_freeze_max_age; ! if (safeMxactLimit < FirstMultiXactId) safeMxactLimit = FirstMultiXactId; if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit)) { ereport(WARNING, ! (errmsg("oldest multixact is far in the past"), errhint("Close open transactions with multixacts soon to avoid wraparound problems."))); mxactLimit = safeMxactLimit; } --- 597,624 ---- Assert(mxid_freezemin >= 0); /* compute the cutoff multi, being careful to generate a valid value */ ! oldestMxact = GetOldestMultiXactId(); ! if (oldestMxact > FirstMultiXactId + mxid_freezemin) ! mxactLimit = oldestMxact - mxid_freezemin; ! else mxactLimit = FirstMultiXactId; ! nextMxactId = ReadNextMultiXactId(); ! if (nextMxactId > FirstMultiXactId + effective_multixact_freeze_max_age) ! safeMxactLimit = nextMxactId - effective_multixact_freeze_max_age; ! else safeMxactLimit = FirstMultiXactId; if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit)) { ereport(WARNING, ! (errmsg("oldest multixact is far in the past: " ! INT64_FORMAT " " INT64_FORMAT " " ! INT64_FORMAT " " INT64_FORMAT " " INT64_FORMAT " " ! INT64_FORMAT " " INT64_FORMAT " " INT64_FORMAT " ", ! multixact_freeze_min_age, vacuum_multixact_freeze_min_age, ! mxactLimit, mxid_freezemin, oldestMxact, ! safeMxactLimit, effective_multixact_freeze_max_age, nextMxactId), errhint("Close open transactions with multixacts soon to avoid wraparound problems."))); mxactLimit = safeMxactLimit; } *************** vacuum_set_xid_limits(Relation rel, *** 632,639 **** * Compute XID limit causing a full-table vacuum, being careful not to * generate a "permanent" XID. */ ! limit = ReadNewTransactionId() - freezetable; ! if (!TransactionIdIsNormal(limit)) limit = FirstNormalTransactionId; *xidFullScanLimit = limit; --- 648,657 ---- * Compute XID limit causing a full-table vacuum, being careful not to * generate a "permanent" XID. */ ! limit = ReadNewTransactionId(); ! if (limit > FirstNormalTransactionId + freezetable) ! limit -= freezetable; ! else limit = FirstNormalTransactionId; *xidFullScanLimit = limit; *************** vacuum_set_xid_limits(Relation rel, *** 657,664 **** * Compute MultiXact limit causing a full-table vacuum, being careful * to generate a valid MultiXact value. */ ! mxactLimit = ReadNextMultiXactId() - freezetable; ! if (mxactLimit < FirstMultiXactId) mxactLimit = FirstMultiXactId; *mxactFullScanLimit = mxactLimit; --- 675,684 ---- * Compute MultiXact limit causing a full-table vacuum, being careful * to generate a valid MultiXact value. */ ! mxactLimit = ReadNextMultiXactId(); ! if (mxactLimit > FirstMultiXactId + freezetable) ! mxactLimit -= freezetable; ! else mxactLimit = FirstMultiXactId; *mxactFullScanLimit = mxactLimit; *************** vac_truncate_clog(TransactionId frozenXI *** 1192,1204 **** AdvanceOldestCommitTsXid(frozenXID); /* - * Truncate CLOG, multixact and CommitTs to the oldest computed value. - */ - TruncateCLOG(frozenXID, oldestxid_datoid); - TruncateCommitTs(frozenXID); - TruncateMultiXact(minMulti, minmulti_datoid); - - /* * Update the wrap limit for GetNewTransactionId and creation of new * MultiXactIds. Note: these functions will also signal the postmaster * for an(other) autovac cycle if needed. XXX should we avoid possibly --- 1212,1217 ---- *************** vac_truncate_clog(TransactionId frozenXI *** 1206,1211 **** --- 1219,1231 ---- */ SetTransactionIdLimit(frozenXID, oldestxid_datoid); SetMultiXactIdLimit(minMulti, minmulti_datoid, false); + + /* + * Truncate CLOG, multixact and CommitTs to the oldest computed value. + */ + TruncateCLOG(frozenXID, oldestxid_datoid); + TruncateCommitTs(frozenXID); + TruncateMultiXact(minMulti, minmulti_datoid); } diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c new file mode 100644 index 246b63c..7c19f1d *** a/src/backend/commands/vacuumlazy.c --- b/src/backend/commands/vacuumlazy.c *************** lazy_vacuum_rel(Relation onerel, int opt *** 384,390 **** vacrelstats->pinskipped_pages, vacrelstats->frozenskipped_pages); appendStringInfo(&buf, ! _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable, oldest xmin: %u\n"), vacrelstats->tuples_deleted, vacrelstats->new_rel_tuples, vacrelstats->new_dead_tuples, --- 384,390 ---- vacrelstats->pinskipped_pages, vacrelstats->frozenskipped_pages); appendStringInfo(&buf, ! _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable, oldest xmin: " XID_FMT "\n"), vacrelstats->tuples_deleted, vacrelstats->new_rel_tuples, vacrelstats->new_dead_tuples, *************** lazy_scan_heap(Relation onerel, int opti *** 858,863 **** --- 858,864 ---- (errmsg("relation \"%s\" page %u is uninitialized --- fixing", relname, blkno))); PageInit(page, BufferGetPageSize(buf), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; HeapPageGetSpecial(page)->pd_magic = HEAP_PAGE_MAGIC; empty_pages++; } *************** lazy_scan_heap(Relation onerel, int opti *** 914,920 **** * We count tuples removed by the pruning step as removed by VACUUM. */ tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false, ! &vacrelstats->latestRemovedXid); /* * Now scan the page to collect vacuumable items and check for tuples --- 915,921 ---- * We count tuples removed by the pruning step as removed by VACUUM. */ tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false, ! &vacrelstats->latestRemovedXid, true); /* * Now scan the page to collect vacuumable items and check for tuples *************** lazy_scan_heap(Relation onerel, int opti *** 973,978 **** --- 974,980 ---- tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(onerel); + HeapTupleCopyBaseFromPage(&tuple, page); tupgone = false; *************** lazy_scan_heap(Relation onerel, int opti *** 1032,1038 **** * The inserter definitely committed. But is it old * enough that everyone sees it as committed? */ ! xmin = HeapTupleHeaderGetXmin(tuple.t_data); if (!TransactionIdPrecedes(xmin, OldestXmin)) { all_visible = false; --- 1034,1040 ---- * The inserter definitely committed. But is it old * enough that everyone sees it as committed? */ ! xmin = HeapTupleGetXmin(&tuple); if (!TransactionIdPrecedes(xmin, OldestXmin)) { all_visible = false; *************** lazy_scan_heap(Relation onerel, int opti *** 1069,1075 **** if (tupgone) { lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); ! HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data, &vacrelstats->latestRemovedXid); tups_vacuumed += 1; has_dead_tuples = true; --- 1071,1077 ---- if (tupgone) { lazy_record_dead_tuple(vacrelstats, &(tuple.t_self)); ! HeapTupleHeaderAdvanceLatestRemovedXid(&tuple, &vacrelstats->latestRemovedXid); tups_vacuumed += 1; has_dead_tuples = true; *************** lazy_scan_heap(Relation onerel, int opti *** 1085,1091 **** * Each non-removable tuple must be checked to see if it needs * freezing. Note we already have exclusive buffer lock. */ ! if (heap_prepare_freeze_tuple(tuple.t_data, FreezeLimit, MultiXactCutoff, &frozen[nfrozen], &tuple_totally_frozen)) frozen[nfrozen++].offset = offnum; --- 1087,1093 ---- * Each non-removable tuple must be checked to see if it needs * freezing. Note we already have exclusive buffer lock. */ ! if (heap_prepare_freeze_tuple(&tuple, FreezeLimit, MultiXactCutoff, &frozen[nfrozen], &tuple_totally_frozen)) frozen[nfrozen++].offset = offnum; *************** lazy_scan_heap(Relation onerel, int opti *** 1115,1121 **** itemid = PageGetItemId(page, frozen[i].offset); htup = (HeapTupleHeader) PageGetItem(page, itemid); ! heap_execute_freeze_tuple(htup, &frozen[i]); } /* Now WAL-log freezing if necessary */ --- 1117,1123 ---- itemid = PageGetItemId(page, frozen[i].offset); htup = (HeapTupleHeader) PageGetItem(page, itemid); ! heap_execute_freeze_tuple_page(page, htup, &frozen[i]); } /* Now WAL-log freezing if necessary */ *************** lazy_scan_heap(Relation onerel, int opti *** 1336,1342 **** */ initStringInfo(&buf); appendStringInfo(&buf, ! _("%.0f dead row versions cannot be removed yet, oldest xmin: %u\n"), nkeep, OldestXmin); appendStringInfo(&buf, _("There were %.0f unused item pointers.\n"), nunused); --- 1338,1344 ---- */ initStringInfo(&buf); appendStringInfo(&buf, ! _("%.0f dead row versions cannot be removed yet, oldest xmin: " XID_FMT "\n"), nkeep, OldestXmin); appendStringInfo(&buf, _("There were %.0f unused item pointers.\n"), nunused); *************** lazy_check_needs_freeze(Buffer buf, bool *** 1562,1567 **** --- 1564,1570 ---- offnum = OffsetNumberNext(offnum)) { ItemId itemid; + HeapTupleData htup; itemid = PageGetItemId(page, offnum); *************** lazy_check_needs_freeze(Buffer buf, bool *** 1575,1581 **** tupleheader = (HeapTupleHeader) PageGetItem(page, itemid); ! if (heap_tuple_needs_freeze(tupleheader, FreezeLimit, MultiXactCutoff, buf)) return true; } /* scan along page */ --- 1578,1587 ---- tupleheader = (HeapTupleHeader) PageGetItem(page, itemid); ! htup.t_data = tupleheader; ! HeapTupleCopyBaseFromPage(&htup, page); ! ! if (heap_tuple_needs_freeze(&htup, FreezeLimit, MultiXactCutoff, buf)) return true; } /* scan along page */ *************** heap_page_is_all_visible(Relation rel, B *** 2140,2145 **** --- 2146,2152 ---- tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyBaseFromPage(&tuple, page); switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf)) { *************** heap_page_is_all_visible(Relation rel, B *** 2159,2165 **** * The inserter definitely committed. But is it old enough * that everyone sees it as committed? */ ! xmin = HeapTupleHeaderGetXmin(tuple.t_data); if (!TransactionIdPrecedes(xmin, OldestXmin)) { all_visible = false; --- 2166,2172 ---- * The inserter definitely committed. But is it old enough * that everyone sees it as committed? */ ! xmin = HeapTupleGetXmin(&tuple); if (!TransactionIdPrecedes(xmin, OldestXmin)) { all_visible = false; diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c new file mode 100644 index bd8a15d..47661fa *** a/src/backend/executor/execExprInterp.c --- b/src/backend/executor/execExprInterp.c *************** ExecEvalFieldStoreDeForm(ExprState *stat *** 2567,2572 **** --- 2567,2573 ---- tmptup.t_len = HeapTupleHeaderGetDatumLength(tuphdr); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tmptup); tmptup.t_data = tuphdr; heap_deform_tuple(&tmptup, tupDesc, diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c new file mode 100644 index 4b594d4..824dfcf *** a/src/backend/executor/execMain.c --- b/src/backend/executor/execMain.c *************** EvalPlanQualFetch(EState *estate, Relati *** 2595,2601 **** * atomic, and Xmin never changes in an existing tuple, except to * invalid or frozen, and neither of those can match priorXmax.) */ ! if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data), priorXmax)) { ReleaseBuffer(buffer); --- 2595,2601 ---- * atomic, and Xmin never changes in an existing tuple, except to * invalid or frozen, and neither of those can match priorXmax.) */ ! if (!TransactionIdEquals(HeapTupleGetXmin(&tuple), priorXmax)) { ReleaseBuffer(buffer); *************** EvalPlanQualFetch(EState *estate, Relati *** 2743,2749 **** /* * As above, if xmin isn't what we're expecting, do nothing. */ ! if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data), priorXmax)) { ReleaseBuffer(buffer); --- 2743,2749 ---- /* * As above, if xmin isn't what we're expecting, do nothing. */ ! if (!TransactionIdEquals(HeapTupleGetXmin(&tuple), priorXmax)) { ReleaseBuffer(buffer); *************** EvalPlanQualFetch(EState *estate, Relati *** 2772,2778 **** /* updated, so look at the updated row */ tuple.t_self = tuple.t_data->t_ctid; /* updated row should have xmin matching this xmax */ ! priorXmax = HeapTupleHeaderGetUpdateXid(tuple.t_data); ReleaseBuffer(buffer); /* loop back to fetch next in chain */ } --- 2772,2778 ---- /* updated, so look at the updated row */ tuple.t_self = tuple.t_data->t_ctid; /* updated row should have xmin matching this xmax */ ! priorXmax = HeapTupleGetUpdateXidAny(&tuple); ReleaseBuffer(buffer); /* loop back to fetch next in chain */ } *************** EvalPlanQualFetchRowMarks(EPQState *epqs *** 2982,2987 **** --- 2982,2988 ---- tuple.t_tableOid = erm->relid; /* also copy t_ctid in case there's valid data there */ tuple.t_self = td->t_ctid; + HeapTupleSetZeroBase(&tuple); /* copy and store tuple */ EvalPlanQualSetTuple(epqstate, erm->rti, diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c new file mode 100644 index ee6c4af..2a96573 *** a/src/backend/executor/execUtils.c --- b/src/backend/executor/execUtils.c *************** GetAttributeByNum(HeapTupleHeader tuple, *** 985,990 **** --- 985,992 ---- tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tmptup); + HeapTupleSetZeroBase(&tmptup); tmptup.t_data = tuple; result = heap_getattr(&tmptup, diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c new file mode 100644 index f7e55e0..6bfc688 *** a/src/backend/executor/nodeBitmapHeapscan.c --- b/src/backend/executor/nodeBitmapHeapscan.c *************** BitmapHeapNext(BitmapHeapScanState *node *** 300,305 **** --- 300,306 ---- scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); scan->rs_ctup.t_len = ItemIdGetLength(lp); scan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; + HeapTupleCopyBaseFromPage(&scan->rs_ctup, dp); ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset); pgstat_count_heap_fetch(scan->rs_rd); *************** bitgetpage(HeapScanDesc scan, TBMIterate *** 427,432 **** --- 428,434 ---- loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); loctup.t_len = ItemIdGetLength(lp); loctup.t_tableOid = scan->rs_rd->rd_id; + HeapTupleCopyBaseFromPage(&loctup, dp); ItemPointerSet(&loctup.t_self, page, offnum); valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer); if (valid) diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c new file mode 100644 index 49586a3..79b33a4 *** a/src/backend/executor/nodeModifyTable.c --- b/src/backend/executor/nodeModifyTable.c *************** ExecCheckHeapTupleVisible(EState *estate *** 209,215 **** * visible to our snapshot. (This would happen, for example, if * conflicting keys are proposed for insertion in a single command.) */ ! if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); --- 209,215 ---- * visible to our snapshot. (This would happen, for example, if * conflicting keys are proposed for insertion in a single command.) */ ! if (!TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple))) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); *************** ExecOnConflictUpdate(ModifyTableState *m *** 1248,1254 **** * that for SQL MERGE, an exception must be raised in the event of * an attempt to update the same row twice. */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data))) ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"), --- 1248,1254 ---- * that for SQL MERGE, an exception must be raised in the event of * an attempt to update the same row twice. */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(&tuple))) ereport(ERROR, (errcode(ERRCODE_CARDINALITY_VIOLATION), errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"), *************** ExecModifyTable(PlanState *pstate) *** 1720,1725 **** --- 1720,1726 ---- HeapTupleHeaderGetDatumLength(oldtupdata.t_data); ItemPointerSetInvalid(&(oldtupdata.t_self)); /* Historically, view triggers see invalid t_tableOid. */ + HeapTupleSetZeroBase(&oldtupdata); oldtupdata.t_tableOid = (relkind == RELKIND_VIEW) ? InvalidOid : RelationGetRelid(resultRelInfo->ri_RelationDesc); diff --git a/src/backend/executor/nodeSamplescan.c b/src/backend/executor/nodeSamplescan.c new file mode 100644 index 9c74a83..4079d1b *** a/src/backend/executor/nodeSamplescan.c --- b/src/backend/executor/nodeSamplescan.c *************** tablesample_getnext(SampleScanState *sca *** 456,461 **** --- 456,462 ---- tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple->t_len = ItemIdGetLength(itemid); + HeapTupleCopyBaseFromPage(tuple, page); ItemPointerSet(&(tuple->t_self), blockno, tupoffset); if (all_visible) diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c new file mode 100644 index afe231f..14ac0a3 *** a/src/backend/executor/spi.c --- b/src/backend/executor/spi.c *************** SPI_modifytuple(Relation rel, HeapTuple *** 740,745 **** --- 740,746 ---- mtuple->t_data->t_ctid = tuple->t_data->t_ctid; mtuple->t_self = tuple->t_self; mtuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyBase(mtuple, tuple); if (rel->rd_att->tdhasoid) HeapTupleSetOid(mtuple, HeapTupleGetOid(tuple)); } diff --git a/src/backend/executor/tqueue.c b/src/backend/executor/tqueue.c new file mode 100644 index 6afcd1a..16fe0ea *** a/src/backend/executor/tqueue.c --- b/src/backend/executor/tqueue.c *************** TQExamineRecord(TQueueDestReceiver *tque *** 436,441 **** --- 436,442 ---- ItemPointerSetInvalid(&(tdata.t_self)); tdata.t_tableOid = InvalidOid; tdata.t_data = tup; + HeapTupleSetZeroBase(&tdata); heap_deform_tuple(&tdata, tupledesc, values, isnull); /* Recursively check each interesting non-NULL attribute. */ *************** TupleQueueHandleDataMessage(TupleQueueRe *** 752,757 **** --- 753,759 ---- htup.t_tableOid = InvalidOid; htup.t_len = nbytes; htup.t_data = data; + HeapTupleSetZeroBase(&htup); /* * Either just copy the data into a regular palloc'd tuple, or remap it, diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c new file mode 100644 index a1ebd4a..0207c4e *** a/src/backend/optimizer/util/plancat.c --- b/src/backend/optimizer/util/plancat.c *************** get_relation_info(PlannerInfo *root, Oid *** 208,214 **** * src/backend/access/heap/README.HOT for discussion. */ if (index->indcheckxmin && ! !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data), TransactionXmin)) { root->glob->transientPlan = true; --- 208,214 ---- * src/backend/access/heap/README.HOT for discussion. */ if (index->indcheckxmin && ! !TransactionIdPrecedes(HeapTupleGetXmin(indexRelation->rd_indextuple), TransactionXmin)) { root->glob->transientPlan = true; diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c new file mode 100644 index 776b1c0..d7c90fe *** a/src/backend/postmaster/autovacuum.c --- b/src/backend/postmaster/autovacuum.c *************** int autovacuum_vac_thresh; *** 118,125 **** double autovacuum_vac_scale; int autovacuum_anl_thresh; double autovacuum_anl_scale; ! int autovacuum_freeze_max_age; ! int autovacuum_multixact_freeze_max_age; int autovacuum_vac_cost_delay; int autovacuum_vac_cost_limit; --- 118,125 ---- double autovacuum_vac_scale; int autovacuum_anl_thresh; double autovacuum_anl_scale; ! int64 autovacuum_freeze_max_age; ! int64 autovacuum_multixact_freeze_max_age; int autovacuum_vac_cost_delay; int autovacuum_vac_cost_limit; *************** static TransactionId recentXid; *** 147,156 **** static MultiXactId recentMulti; /* Default freeze ages to use for autovacuum (varies by database) */ ! static int default_freeze_min_age; ! static int default_freeze_table_age; ! static int default_multixact_freeze_min_age; ! static int default_multixact_freeze_table_age; /* Memory context for long-lived data */ static MemoryContext AutovacMemCxt; --- 147,156 ---- static MultiXactId recentMulti; /* Default freeze ages to use for autovacuum (varies by database) */ ! static int64 default_freeze_min_age; ! static int64 default_freeze_table_age; ! static int64 default_multixact_freeze_min_age; ! static int64 default_multixact_freeze_table_age; /* Memory context for long-lived data */ static MemoryContext AutovacMemCxt; *************** static void FreeWorkerInfo(int code, Dat *** 325,335 **** static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, ! int effective_multixact_freeze_max_age); static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, ! int effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void autovacuum_do_vac_analyze(autovac_table *tab, --- 325,335 ---- static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, ! int64 effective_multixact_freeze_max_age); static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, ! int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void autovacuum_do_vac_analyze(autovac_table *tab, *************** do_start_worker(void) *** 1128,1133 **** --- 1128,1134 ---- ListCell *cell; TransactionId xidForceLimit; MultiXactId multiForceLimit; + int64 multiMembersThreshold; bool for_xid_wrap; bool for_multi_wrap; avw_dbase *avdb; *************** do_start_worker(void) *** 1167,1183 **** * particular tables, but not loosened.) */ recentXid = ReadNewTransactionId(); ! xidForceLimit = recentXid - autovacuum_freeze_max_age; ! /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */ ! /* this can cause the limit to go backwards by 3, but that's OK */ ! if (xidForceLimit < FirstNormalTransactionId) ! xidForceLimit -= FirstNormalTransactionId; /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); ! multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold(); ! if (multiForceLimit < FirstMultiXactId) ! multiForceLimit -= FirstMultiXactId; /* * Choose a database to connect to. We pick the database that was least --- 1168,1185 ---- * particular tables, but not loosened.) */ recentXid = ReadNewTransactionId(); ! if (recentXid > FirstNormalTransactionId + autovacuum_freeze_max_age) ! xidForceLimit = recentXid - autovacuum_freeze_max_age; ! else ! xidForceLimit = FirstNormalTransactionId; /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); ! multiMembersThreshold = autovacuum_multixact_freeze_max_age; ! if (recentMulti > FirstMultiXactId + multiMembersThreshold) ! multiForceLimit = recentMulti - multiMembersThreshold; ! else ! multiForceLimit = FirstMultiXactId; /* * Choose a database to connect to. We pick the database that was least *************** do_autovacuum(void) *** 1942,1948 **** BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; ! int effective_multixact_freeze_max_age; bool did_vacuum = false; bool found_concurrent_worker = false; int i; --- 1944,1950 ---- BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; ! int64 effective_multixact_freeze_max_age; bool did_vacuum = false; bool found_concurrent_worker = false; int i; *************** do_autovacuum(void) *** 1978,1984 **** * normally autovacuum_multixact_freeze_max_age, but may be less if we are * short of multixact member space. */ ! effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); /* * Find the pg_database entry and select the default freeze ages. We use --- 1980,1986 ---- * normally autovacuum_multixact_freeze_max_age, but may be less if we are * short of multixact member space. */ ! effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age; /* * Find the pg_database entry and select the default freeze ages. We use *************** get_pgstat_tabentry_relid(Oid relid, boo *** 2742,2748 **** static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, ! int effective_multixact_freeze_max_age) { Form_pg_class classForm; HeapTuple classTup; --- 2744,2750 ---- static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, ! int64 effective_multixact_freeze_max_age) { Form_pg_class classForm; HeapTuple classTup; *************** table_recheck_autovac(Oid relid, HTAB *t *** 2798,2807 **** /* OK, it needs something done */ if (doanalyze || dovacuum) { ! int freeze_min_age; ! int freeze_table_age; ! int multixact_freeze_min_age; ! int multixact_freeze_table_age; int vac_cost_limit; int vac_cost_delay; int log_min_duration; --- 2800,2809 ---- /* OK, it needs something done */ if (doanalyze || dovacuum) { ! int64 freeze_min_age; ! int64 freeze_table_age; ! int64 multixact_freeze_min_age; ! int64 multixact_freeze_table_age; int vac_cost_limit; int vac_cost_delay; int log_min_duration; *************** relation_needs_vacanalyze(Oid relid, *** 2926,2932 **** AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, ! int effective_multixact_freeze_max_age, /* output params below */ bool *dovacuum, bool *doanalyze, --- 2928,2934 ---- AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, ! int64 effective_multixact_freeze_max_age, /* output params below */ bool *dovacuum, bool *doanalyze, *************** relation_needs_vacanalyze(Oid relid, *** 2951,2958 **** anltuples; /* freeze parameters */ ! int freeze_max_age; ! int multixact_freeze_max_age; TransactionId xidForceLimit; MultiXactId multiForceLimit; --- 2953,2960 ---- anltuples; /* freeze parameters */ ! int64 freeze_max_age; ! int64 multixact_freeze_max_age; TransactionId xidForceLimit; MultiXactId multiForceLimit; *************** relation_needs_vacanalyze(Oid relid, *** 2993,3009 **** av_enabled = (relopts ? relopts->enabled : true); /* Force vacuum if table is at risk of wraparound */ ! xidForceLimit = recentXid - freeze_max_age; ! if (xidForceLimit < FirstNormalTransactionId) ! xidForceLimit -= FirstNormalTransactionId; force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) && TransactionIdPrecedes(classForm->relfrozenxid, xidForceLimit)); if (!force_vacuum) { ! multiForceLimit = recentMulti - multixact_freeze_max_age; ! if (multiForceLimit < FirstMultiXactId) ! multiForceLimit -= FirstMultiXactId; force_vacuum = MultiXactIdPrecedes(classForm->relminmxid, multiForceLimit); } --- 2995,3013 ---- av_enabled = (relopts ? relopts->enabled : true); /* Force vacuum if table is at risk of wraparound */ ! if (recentXid > FirstNormalTransactionId + freeze_max_age) ! xidForceLimit = recentXid - freeze_max_age; ! else ! xidForceLimit = FirstNormalTransactionId; force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) && TransactionIdPrecedes(classForm->relfrozenxid, xidForceLimit)); if (!force_vacuum) { ! if (recentMulti > FirstMultiXactId + multixact_freeze_max_age) ! multiForceLimit = recentMulti - multixact_freeze_max_age; ! else ! multiForceLimit = FirstMultiXactId; force_vacuum = MultiXactIdPrecedes(classForm->relminmxid, multiForceLimit); } diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c new file mode 100644 index 486fd0c..bbb3f16 *** a/src/backend/replication/logical/decode.c --- b/src/backend/replication/logical/decode.c *************** DecodeHeapOp(LogicalDecodingContext *ctx *** 467,472 **** --- 467,476 ---- /* we don't care about row level locks for now */ break; + case XLOG_HEAP_BASE_SHIFT: + /* we don't care about base shift */ + break; + default: elog(ERROR, "unexpected RM_HEAP_ID record type: %u", info); break; *************** DecodeInsert(LogicalDecodingContext *ctx *** 643,650 **** xl_heap_insert *xlrec; ReorderBufferChange *change; RelFileNode target_node; ! xlrec = (xl_heap_insert *) XLogRecGetData(r); /* only interested in our database */ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL); --- 647,659 ---- xl_heap_insert *xlrec; ReorderBufferChange *change; RelFileNode target_node; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data; ! rec_data = (Pointer) XLogRecGetData(r); ! if (isinit) ! rec_data += sizeof(TransactionId); ! xlrec = (xl_heap_insert *) rec_data; /* only interested in our database */ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL); *************** DecodeUpdate(LogicalDecodingContext *ctx *** 695,702 **** ReorderBufferChange *change; char *data; RelFileNode target_node; ! xlrec = (xl_heap_update *) XLogRecGetData(r); /* only interested in our database */ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL); --- 704,716 ---- ReorderBufferChange *change; char *data; RelFileNode target_node; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data; ! rec_data = (Pointer) XLogRecGetData(r); ! if (isinit) ! rec_data += sizeof(TransactionId); ! xlrec = (xl_heap_update *) rec_data; /* only interested in our database */ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL); *************** DecodeMultiInsert(LogicalDecodingContext *** 820,827 **** char *tupledata; Size tuplelen; RelFileNode rnode; ! xlrec = (xl_heap_multi_insert *) XLogRecGetData(r); /* only interested in our database */ XLogRecGetBlockTag(r, 0, &rnode, NULL, NULL); --- 834,846 ---- char *tupledata; Size tuplelen; RelFileNode rnode; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data; ! rec_data = (Pointer) XLogRecGetData(r); ! if (isinit) ! rec_data += sizeof(TransactionId); ! xlrec = (xl_heap_multi_insert *) rec_data; /* only interested in our database */ XLogRecGetBlockTag(r, 0, &rnode, NULL, NULL); *************** DecodeMultiInsert(LogicalDecodingContext *** 877,882 **** --- 896,902 ---- * transactions. */ tuple->tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&(tuple->tuple)); tuple->tuple.t_len = datalen + SizeofHeapTupleHeader; *************** DecodeXLogTuple(char *data, Size len, Re *** 967,972 **** --- 987,993 ---- /* we can only figure this out after reassembling the transactions */ tuple->tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&(tuple->tuple)); /* data is not stored aligned, copy to aligned storage */ memcpy((char *) &xlhdr, diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c new file mode 100644 index 657bafa..389004b *** a/src/backend/replication/logical/reorderbuffer.c --- b/src/backend/replication/logical/reorderbuffer.c *************** ReorderBufferCommit(ReorderBuffer *rb, T *** 1609,1615 **** /* this is just a sanity check against bad output plugin behaviour */ if (GetCurrentTransactionIdIfAny() != InvalidTransactionId) ! elog(ERROR, "output plugin used XID %u", GetCurrentTransactionId()); /* cleanup */ --- 1609,1615 ---- /* this is just a sanity check against bad output plugin behaviour */ if (GetCurrentTransactionIdIfAny() != InvalidTransactionId) ! elog(ERROR, "output plugin used XID " XID_FMT, GetCurrentTransactionId()); /* cleanup */ *************** ReorderBufferAbortOld(ReorderBuffer *rb, *** 1724,1730 **** if (TransactionIdPrecedes(txn->xid, oldestRunningXid)) { ! elog(DEBUG2, "aborting old transaction %u", txn->xid); /* remove potential on-disk data, and deallocate this tx */ ReorderBufferCleanupTXN(rb, txn); --- 1724,1730 ---- if (TransactionIdPrecedes(txn->xid, oldestRunningXid)) { ! elog(DEBUG2, "aborting old transaction " XID_FMT, txn->xid); /* remove potential on-disk data, and deallocate this tx */ ReorderBufferCleanupTXN(rb, txn); *************** ReorderBufferSerializeTXN(ReorderBuffer *** 2060,2066 **** Size spilled = 0; char path[MAXPGPATH]; ! elog(DEBUG2, "spill %u changes in XID %u to disk", (uint32) txn->nentries_mem, txn->xid); /* do the same to all child TXs */ --- 2060,2066 ---- Size spilled = 0; char path[MAXPGPATH]; ! elog(DEBUG2, "spill %u changes in XID " XID_FMT " to disk", (uint32) txn->nentries_mem, txn->xid); /* do the same to all child TXs */ *************** ReorderBufferSerializeTXN(ReorderBuffer *** 2097,2103 **** * No need to care about TLIs here, only used during a single run, * so each LSN only maps to a specific WAL record. */ ! sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap", NameStr(MyReplicationSlot->data.name), txn->xid, (uint32) (recptr >> 32), (uint32) recptr); --- 2097,2103 ---- * No need to care about TLIs here, only used during a single run, * so each LSN only maps to a specific WAL record. */ ! sprintf(path, "pg_replslot/%s/xid-" XID_FMT "-lsn-%X-%X.snap", NameStr(MyReplicationSlot->data.name), txn->xid, (uint32) (recptr >> 32), (uint32) recptr); *************** ReorderBufferSerializeChange(ReorderBuff *** 2285,2291 **** errno = save_errno; ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not write to data file for XID %u: %m", txn->xid))); } pgstat_report_wait_end(); --- 2285,2291 ---- errno = save_errno; ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not write to data file for XID " XID_FMT ": %m", txn->xid))); } pgstat_report_wait_end(); *************** ReorderBufferRestoreChanges(ReorderBuffe *** 2344,2350 **** * No need to care about TLIs here, only used during a single run, * so each LSN only maps to a specific WAL record. */ ! sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap", NameStr(MyReplicationSlot->data.name), txn->xid, (uint32) (recptr >> 32), (uint32) recptr); --- 2344,2350 ---- * No need to care about TLIs here, only used during a single run, * so each LSN only maps to a specific WAL record. */ ! sprintf(path, "pg_replslot/%s/xid-" XID_FMT "-lsn-%X-%X.snap", NameStr(MyReplicationSlot->data.name), txn->xid, (uint32) (recptr >> 32), (uint32) recptr); *************** ReorderBufferRestoreCleanup(ReorderBuffe *** 2586,2592 **** XLogSegNoOffsetToRecPtr(cur, 0, recptr); ! sprintf(path, "pg_replslot/%s/xid-%u-lsn-%X-%X.snap", NameStr(MyReplicationSlot->data.name), txn->xid, (uint32) (recptr >> 32), (uint32) recptr); if (unlink(path) != 0 && errno != ENOENT) --- 2586,2592 ---- XLogSegNoOffsetToRecPtr(cur, 0, recptr); ! sprintf(path, "pg_replslot/%s/xid-" XID_FMT "-lsn-%X-%X.snap", NameStr(MyReplicationSlot->data.name), txn->xid, (uint32) (recptr >> 32), (uint32) recptr); if (unlink(path) != 0 && errno != ENOENT) *************** UpdateLogicalMappings(HTAB *tuplecid_dat *** 3167,3174 **** TransactionId f_mapped_xid; TransactionId f_create_xid; XLogRecPtr f_lsn; ! uint32 f_hi, ! f_lo; RewriteMappingFile *f; if (strcmp(mapping_de->d_name, ".") == 0 || --- 3167,3178 ---- TransactionId f_mapped_xid; TransactionId f_create_xid; XLogRecPtr f_lsn; ! uint32 f_lsn_hi, ! f_lsn_lo, ! f_mapped_xid_hi, ! f_mapped_xid_lo, ! f_create_xid_hi, ! f_create_xid_lo; RewriteMappingFile *f; if (strcmp(mapping_de->d_name, ".") == 0 || *************** UpdateLogicalMappings(HTAB *tuplecid_dat *** 3180,3190 **** continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, ! &f_dboid, &f_relid, &f_hi, &f_lo, ! &f_mapped_xid, &f_create_xid) != 6) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); ! f_lsn = ((uint64) f_hi) << 32 | f_lo; /* mapping for another database */ if (f_dboid != dboid) --- 3184,3197 ---- continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, ! &f_dboid, &f_relid, &f_lsn_hi, &f_lsn_lo, ! &f_mapped_xid_hi, &f_mapped_xid_lo, ! &f_create_xid_hi, &f_create_xid_lo) != 8) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); ! f_lsn = ((uint64) f_lsn_hi) << 32 | f_lsn_lo; ! f_mapped_xid = ((uint64) f_mapped_xid_hi) << 32 | f_mapped_xid_lo; ! f_create_xid = ((uint64) f_create_xid_hi) << 32 | f_create_xid_lo; /* mapping for another database */ if (f_dboid != dboid) *************** UpdateLogicalMappings(HTAB *tuplecid_dat *** 3226,3232 **** { RewriteMappingFile *f = files_a[off]; ! elog(DEBUG1, "applying mapping: \"%s\" in %u", f->fname, snapshot->subxip[0]); ApplyLogicalMappingFile(tuplecid_data, relid, f->fname); pfree(f); --- 3233,3239 ---- { RewriteMappingFile *f = files_a[off]; ! elog(DEBUG1, "applying mapping: \"%s\" in " XID_FMT, f->fname, snapshot->subxip[0]); ApplyLogicalMappingFile(tuplecid_data, relid, f->fname); pfree(f); diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c new file mode 100644 index fba57a0..5402b59 *** a/src/backend/replication/logical/snapbuild.c --- b/src/backend/replication/logical/snapbuild.c *************** SnapBuildDistributeNewCatalogSnapshot(Sn *** 837,843 **** if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, txn->xid)) continue; ! elog(DEBUG2, "adding a new snapshot to %u at %X/%X", txn->xid, (uint32) (lsn >> 32), (uint32) lsn); /* --- 837,843 ---- if (!ReorderBufferXidHasBaseSnapshot(builder->reorder, txn->xid)) continue; ! elog(DEBUG2, "adding a new snapshot to " XID_FMT " at %X/%X", txn->xid, (uint32) (lsn >> 32), (uint32) lsn); /* *************** SnapBuildPurgeCommittedTxn(SnapBuild *bu *** 912,918 **** memcpy(builder->committed.xip, workspace, surviving_xids * sizeof(TransactionId)); ! elog(DEBUG3, "purged committed transactions from %u to %u, xmin: %u, xmax: %u", (uint32) builder->committed.xcnt, (uint32) surviving_xids, builder->xmin, builder->xmax); builder->committed.xcnt = surviving_xids; --- 912,918 ---- memcpy(builder->committed.xip, workspace, surviving_xids * sizeof(TransactionId)); ! elog(DEBUG3, "purged committed transactions from %u to %u, xmin: " XID_FMT ", xmax: " XID_FMT, (uint32) builder->committed.xcnt, (uint32) surviving_xids, builder->xmin, builder->xmax); builder->committed.xcnt = surviving_xids; *************** SnapBuildCommitTxn(SnapBuild *builder, X *** 978,984 **** sub_needs_timetravel = true; needs_snapshot = true; ! elog(DEBUG1, "found subtransaction %u:%u with catalog changes", xid, subxid); SnapBuildAddCommittedTxn(builder, subxid); --- 978,984 ---- sub_needs_timetravel = true; needs_snapshot = true; ! elog(DEBUG1, "found subtransaction " XID_FMT ":" XID_FMT " with catalog changes", xid, subxid); SnapBuildAddCommittedTxn(builder, subxid); *************** SnapBuildCommitTxn(SnapBuild *builder, X *** 1004,1010 **** /* if top-level modified catalog, it'll need a snapshot */ if (ReorderBufferXidHasCatalogChanges(builder->reorder, xid)) { ! elog(DEBUG2, "found top level transaction %u, with catalog changes", xid); needs_snapshot = true; needs_timetravel = true; --- 1004,1010 ---- /* if top-level modified catalog, it'll need a snapshot */ if (ReorderBufferXidHasCatalogChanges(builder->reorder, xid)) { ! elog(DEBUG2, "found top level transaction " XID_FMT ", with catalog changes", xid); needs_snapshot = true; needs_timetravel = true; *************** SnapBuildCommitTxn(SnapBuild *builder, X *** 1017,1023 **** } else if (needs_timetravel) { ! elog(DEBUG2, "forced transaction %u to do timetravel", xid); SnapBuildAddCommittedTxn(builder, xid); } --- 1017,1023 ---- } else if (needs_timetravel) { ! elog(DEBUG2, "forced transaction " XID_FMT " to do timetravel", xid); SnapBuildAddCommittedTxn(builder, xid); } *************** SnapBuildProcessRunningXacts(SnapBuild * *** 1126,1132 **** /* Remove transactions we don't need to keep track off anymore */ SnapBuildPurgeCommittedTxn(builder); ! elog(DEBUG3, "xmin: %u, xmax: %u, oldestrunning: %u", builder->xmin, builder->xmax, running->oldestRunningXid); --- 1126,1132 ---- /* Remove transactions we don't need to keep track off anymore */ SnapBuildPurgeCommittedTxn(builder); ! elog(DEBUG3, "xmin: " XID_FMT ", xmax: " XID_FMT ", oldestrunning: " XID_FMT, builder->xmin, builder->xmax, running->oldestRunningXid); *************** SnapBuildFindSnapshot(SnapBuild *builder *** 1222,1228 **** ereport(DEBUG1, (errmsg_internal("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low", (uint32) (lsn >> 32), (uint32) lsn), ! errdetail_internal("initial xmin horizon of %u vs the snapshot's %u", builder->initial_xmin_horizon, running->oldestRunningXid))); --- 1222,1228 ---- ereport(DEBUG1, (errmsg_internal("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low", (uint32) (lsn >> 32), (uint32) lsn), ! errdetail_internal("initial xmin horizon of " XID_FMT " vs the snapshot's " XID_FMT, builder->initial_xmin_horizon, running->oldestRunningXid))); *************** SnapBuildFindSnapshot(SnapBuild *builder *** 1305,1311 **** ereport(LOG, (errmsg("logical decoding found initial starting point at %X/%X", (uint32) (lsn >> 32), (uint32) lsn), ! errdetail("Waiting for transactions (approximately %d) older than %u to end.", running->xcnt, running->nextXid))); SnapBuildWaitSnapshot(running, running->nextXid); --- 1305,1311 ---- ereport(LOG, (errmsg("logical decoding found initial starting point at %X/%X", (uint32) (lsn >> 32), (uint32) lsn), ! errdetail("Waiting for transactions (approximately %d) older than " XID_FMT " to end.", running->xcnt, running->nextXid))); SnapBuildWaitSnapshot(running, running->nextXid); *************** SnapBuildFindSnapshot(SnapBuild *builder *** 1329,1335 **** ereport(LOG, (errmsg("logical decoding found initial consistent point at %X/%X", (uint32) (lsn >> 32), (uint32) lsn), ! errdetail("Waiting for transactions (approximately %d) older than %u to end.", running->xcnt, running->nextXid))); SnapBuildWaitSnapshot(running, running->nextXid); --- 1329,1335 ---- ereport(LOG, (errmsg("logical decoding found initial consistent point at %X/%X", (uint32) (lsn >> 32), (uint32) lsn), ! errdetail("Waiting for transactions (approximately %d) older than " XID_FMT " to end.", running->xcnt, running->nextXid))); SnapBuildWaitSnapshot(running, running->nextXid); diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c new file mode 100644 index ea9d21a..1bc3806 *** a/src/backend/replication/walreceiver.c --- b/src/backend/replication/walreceiver.c *************** static void *** 1174,1182 **** XLogWalRcvSendHSFeedback(bool immed) { TimestampTz now; - TransactionId nextXid; - uint32 xmin_epoch, - catalog_xmin_epoch; TransactionId xmin, catalog_xmin; static TimestampTz sendTime = 0; --- 1174,1179 ---- *************** XLogWalRcvSendHSFeedback(bool immed) *** 1248,1275 **** catalog_xmin = InvalidTransactionId; } ! /* ! * Get epoch and adjust if nextXid and oldestXmin are different sides of ! * the epoch boundary. ! */ ! GetNextXidAndEpoch(&nextXid, &xmin_epoch); ! catalog_xmin_epoch = xmin_epoch; ! if (nextXid < xmin) ! xmin_epoch--; ! if (nextXid < catalog_xmin) ! catalog_xmin_epoch--; ! ! elog(DEBUG2, "sending hot standby feedback xmin %u epoch %u catalog_xmin %u catalog_xmin_epoch %u", ! xmin, xmin_epoch, catalog_xmin, catalog_xmin_epoch); /* Construct the message and send it. */ resetStringInfo(&reply_message); pq_sendbyte(&reply_message, 'h'); pq_sendint64(&reply_message, GetCurrentTimestamp()); ! pq_sendint(&reply_message, xmin, 4); ! pq_sendint(&reply_message, xmin_epoch, 4); ! pq_sendint(&reply_message, catalog_xmin, 4); ! pq_sendint(&reply_message, catalog_xmin_epoch, 4); walrcv_send(wrconn, reply_message.data, reply_message.len); if (TransactionIdIsValid(xmin) || TransactionIdIsValid(catalog_xmin)) master_has_standby_xmin = true; --- 1245,1259 ---- catalog_xmin = InvalidTransactionId; } ! elog(DEBUG2, "sending hot standby feedback xmin " XID_FMT " catalog_xmin " XID_FMT, ! xmin, catalog_xmin); /* Construct the message and send it. */ resetStringInfo(&reply_message); pq_sendbyte(&reply_message, 'h'); pq_sendint64(&reply_message, GetCurrentTimestamp()); ! pq_sendint64(&reply_message, xmin); ! pq_sendint64(&reply_message, catalog_xmin); walrcv_send(wrconn, reply_message.data, reply_message.len); if (TransactionIdIsValid(xmin) || TransactionIdIsValid(catalog_xmin)) master_has_standby_xmin = true; diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c new file mode 100644 index 1fbe8ed..5b00e7c *** a/src/backend/replication/walsender.c --- b/src/backend/replication/walsender.c *************** static void WalSndUpdateProgress(Logical *** 249,255 **** static XLogRecPtr WalSndWaitForWal(XLogRecPtr loc); static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time); static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now); - static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch); static void XLogRead(char *buf, XLogRecPtr startptr, Size count); --- 249,254 ---- *************** PhysicalReplicationSlotNewXmin(Transacti *** 1866,1915 **** } /* - * Check that the provided xmin/epoch are sane, that is, not in the future - * and not so far back as to be already wrapped around. - * - * Epoch of nextXid should be same as standby, or if the counter has - * wrapped, then one greater than standby. - * - * This check doesn't care about whether clog exists for these xids - * at all. - */ - static bool - TransactionIdInRecentPast(TransactionId xid, uint32 epoch) - { - TransactionId nextXid; - uint32 nextEpoch; - - GetNextXidAndEpoch(&nextXid, &nextEpoch); - - if (xid <= nextXid) - { - if (epoch != nextEpoch) - return false; - } - else - { - if (epoch + 1 != nextEpoch) - return false; - } - - if (!TransactionIdPrecedesOrEquals(xid, nextXid)) - return false; /* epoch OK, but it's wrapped around */ - - return true; - } - - /* * Hot Standby feedback */ static void ProcessStandbyHSFeedbackMessage(void) { TransactionId feedbackXmin; - uint32 feedbackEpoch; TransactionId feedbackCatalogXmin; - uint32 feedbackCatalogEpoch; /* * Decipher the reply message. The caller already consumed the msgtype --- 1865,1877 ---- *************** ProcessStandbyHSFeedbackMessage(void) *** 1917,1932 **** * of this message. */ (void) pq_getmsgint64(&reply_message); /* sendTime; not used ATM */ ! feedbackXmin = pq_getmsgint(&reply_message, 4); ! feedbackEpoch = pq_getmsgint(&reply_message, 4); ! feedbackCatalogXmin = pq_getmsgint(&reply_message, 4); ! feedbackCatalogEpoch = pq_getmsgint(&reply_message, 4); ! elog(DEBUG2, "hot standby feedback xmin %u epoch %u, catalog_xmin %u epoch %u", feedbackXmin, ! feedbackEpoch, ! feedbackCatalogXmin, ! feedbackCatalogEpoch); /* * Unset WalSender's xmins if the feedback message values are invalid. --- 1879,1890 ---- * of this message. */ (void) pq_getmsgint64(&reply_message); /* sendTime; not used ATM */ ! feedbackXmin = pq_getmsgint64(&reply_message); ! feedbackCatalogXmin = pq_getmsgint64(&reply_message); ! elog(DEBUG2, "hot standby feedback xmin " XID_FMT " catalog_xmin " XID_FMT, feedbackXmin, ! feedbackCatalogXmin); /* * Unset WalSender's xmins if the feedback message values are invalid. *************** ProcessStandbyHSFeedbackMessage(void) *** 1942,1959 **** } /* - * Check that the provided xmin/epoch are sane, that is, not in the future - * and not so far back as to be already wrapped around. Ignore if not. - */ - if (TransactionIdIsNormal(feedbackXmin) && - !TransactionIdInRecentPast(feedbackXmin, feedbackEpoch)) - return; - - if (TransactionIdIsNormal(feedbackCatalogXmin) && - !TransactionIdInRecentPast(feedbackCatalogXmin, feedbackCatalogEpoch)) - return; - - /* * Set the WalSender's xmin equal to the standby's requested xmin, so that * the xmin will be taken into account by GetOldestXmin. This will hold * back the removal of dead rows and thereby prevent the generation of --- 1900,1905 ---- diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c new file mode 100644 index ffa6180..400f804 *** a/src/backend/storage/ipc/procarray.c --- b/src/backend/storage/ipc/procarray.c *************** ProcArrayApplyRecoveryInfo(RunningTransa *** 734,740 **** else elog(trace_recovery(DEBUG1), "recovery snapshot waiting for non-overflowed snapshot or " ! "until oldest active xid on standby is at least %u (now %u)", standbySnapshotPendingXmin, running->oldestRunningXid); return; --- 734,740 ---- else elog(trace_recovery(DEBUG1), "recovery snapshot waiting for non-overflowed snapshot or " ! "until oldest active xid on standby is at least " XID_FMT " (now " XID_FMT ")", standbySnapshotPendingXmin, running->oldestRunningXid); return; *************** ProcArrayApplyRecoveryInfo(RunningTransa *** 902,908 **** else elog(trace_recovery(DEBUG1), "recovery snapshot waiting for non-overflowed snapshot or " ! "until oldest active xid on standby is at least %u (now %u)", standbySnapshotPendingXmin, running->oldestRunningXid); } --- 902,908 ---- else elog(trace_recovery(DEBUG1), "recovery snapshot waiting for non-overflowed snapshot or " ! "until oldest active xid on standby is at least " XID_FMT " (now " XID_FMT ")", standbySnapshotPendingXmin, running->oldestRunningXid); } *************** XidCacheRemoveRunningXids(TransactionId *** 3057,3063 **** * debug warning. */ if (j < 0 && !MyPgXact->overflowed) ! elog(WARNING, "did not find subXID %u in MyProc", anxid); } for (j = MyPgXact->nxids - 1; j >= 0; j--) --- 3057,3063 ---- * debug warning. */ if (j < 0 && !MyPgXact->overflowed) ! elog(WARNING, "did not find subXID " XID_FMT " in MyProc", anxid); } for (j = MyPgXact->nxids - 1; j >= 0; j--) *************** XidCacheRemoveRunningXids(TransactionId *** 3070,3076 **** } /* Ordinarily we should have found it, unless the cache has overflowed */ if (j < 0 && !MyPgXact->overflowed) ! elog(WARNING, "did not find subXID %u in MyProc", xid); /* Also advance global latestCompletedXid while holding the lock */ if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, --- 3070,3076 ---- } /* Ordinarily we should have found it, unless the cache has overflowed */ if (j < 0 && !MyPgXact->overflowed) ! elog(WARNING, "did not find subXID " XID_FMT " in MyProc", xid); /* Also advance global latestCompletedXid while holding the lock */ if (TransactionIdPrecedes(ShmemVariableCache->latestCompletedXid, *************** RecordKnownAssignedTransactionIds(Transa *** 3176,3182 **** Assert(TransactionIdIsValid(xid)); Assert(TransactionIdIsValid(latestObservedXid)); ! elog(trace_recovery(DEBUG4), "record known xact %u latestObservedXid %u", xid, latestObservedXid); /* --- 3176,3182 ---- Assert(TransactionIdIsValid(xid)); Assert(TransactionIdIsValid(latestObservedXid)); ! elog(trace_recovery(DEBUG4), "record known xact " XID_FMT " latestObservedXid " XID_FMT, xid, latestObservedXid); /* *************** KnownAssignedXidsRemove(TransactionId xi *** 3676,3682 **** { Assert(TransactionIdIsValid(xid)); ! elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid); /* * Note: we cannot consider it an error to remove an XID that's not --- 3676,3682 ---- { Assert(TransactionIdIsValid(xid)); ! elog(trace_recovery(DEBUG4), "remove KnownAssignedXid " XID_FMT, xid); /* * Note: we cannot consider it an error to remove an XID that's not *************** KnownAssignedXidsRemovePreceding(Transac *** 3737,3743 **** return; } ! elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid); /* * Mark entries invalid starting at the tail. Since array is sorted, we --- 3737,3743 ---- return; } ! elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to " XID_FMT, removeXid); /* * Mark entries invalid starting at the tail. Since array is sorted, we *************** KnownAssignedXidsDisplay(int trace_level *** 3926,3932 **** if (KnownAssignedXidsValid[i]) { nxids++; ! appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]); } } --- 3926,3932 ---- if (KnownAssignedXidsValid[i]) { nxids++; ! appendStringInfo(&buf, "[%d]=" XID_FMT " ", i, KnownAssignedXids[i]); } } diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c new file mode 100644 index d491ece..55ccb11 *** a/src/backend/storage/ipc/standby.c --- b/src/backend/storage/ipc/standby.c *************** *** 34,40 **** #include "utils/timestamp.h" /* User-settable GUC parameters */ ! int vacuum_defer_cleanup_age; int max_standby_archive_delay = 30 * 1000; int max_standby_streaming_delay = 30 * 1000; --- 34,40 ---- #include "utils/timestamp.h" /* User-settable GUC parameters */ ! int64 vacuum_defer_cleanup_age; int max_standby_archive_delay = 30 * 1000; int max_standby_streaming_delay = 30 * 1000; *************** StandbyReleaseLocks(TransactionId xid) *** 649,660 **** LOCKTAG locktag; elog(trace_recovery(DEBUG4), ! "releasing recovery lock: xid %u db %u rel %u", lock->xid, lock->dbOid, lock->relOid); SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); if (!LockRelease(&locktag, AccessExclusiveLock, true)) elog(LOG, ! "RecoveryLockList contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u", lock->xid, lock->dbOid, lock->relOid); RecoveryLockList = list_delete_cell(RecoveryLockList, cell, prev); --- 649,660 ---- LOCKTAG locktag; elog(trace_recovery(DEBUG4), ! "releasing recovery lock: xid " XID_FMT " db %u rel %u", lock->xid, lock->dbOid, lock->relOid); SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); if (!LockRelease(&locktag, AccessExclusiveLock, true)) elog(LOG, ! "RecoveryLockList contains entry for lock no longer recorded by lock manager: xid " XID_FMT " database %u relation %u", lock->xid, lock->dbOid, lock->relOid); RecoveryLockList = list_delete_cell(RecoveryLockList, cell, prev); *************** StandbyReleaseAllLocks(void) *** 704,715 **** next = lnext(cell); elog(trace_recovery(DEBUG4), ! "releasing recovery lock: xid %u db %u rel %u", lock->xid, lock->dbOid, lock->relOid); SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); if (!LockRelease(&locktag, AccessExclusiveLock, true)) elog(LOG, ! "RecoveryLockList contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u", lock->xid, lock->dbOid, lock->relOid); RecoveryLockList = list_delete_cell(RecoveryLockList, cell, prev); pfree(lock); --- 704,715 ---- next = lnext(cell); elog(trace_recovery(DEBUG4), ! "releasing recovery lock: xid " XID_FMT " db %u rel %u", lock->xid, lock->dbOid, lock->relOid); SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); if (!LockRelease(&locktag, AccessExclusiveLock, true)) elog(LOG, ! "RecoveryLockList contains entry for lock no longer recorded by lock manager: xid " XID_FMT " database %u relation %u", lock->xid, lock->dbOid, lock->relOid); RecoveryLockList = list_delete_cell(RecoveryLockList, cell, prev); pfree(lock); *************** StandbyReleaseOldLocks(int nxids, Transa *** 765,776 **** if (remove) { elog(trace_recovery(DEBUG4), ! "releasing recovery lock: xid %u db %u rel %u", lock->xid, lock->dbOid, lock->relOid); SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); if (!LockRelease(&locktag, AccessExclusiveLock, true)) elog(LOG, ! "RecoveryLockList contains entry for lock no longer recorded by lock manager: xid %u database %u relation %u", lock->xid, lock->dbOid, lock->relOid); RecoveryLockList = list_delete_cell(RecoveryLockList, cell, prev); pfree(lock); --- 765,776 ---- if (remove) { elog(trace_recovery(DEBUG4), ! "releasing recovery lock: xid " XID_FMT " db %u rel %u", lock->xid, lock->dbOid, lock->relOid); SET_LOCKTAG_RELATION(locktag, lock->dbOid, lock->relOid); if (!LockRelease(&locktag, AccessExclusiveLock, true)) elog(LOG, ! "RecoveryLockList contains entry for lock no longer recorded by lock manager: xid " XID_FMT " database %u relation %u", lock->xid, lock->dbOid, lock->relOid); RecoveryLockList = list_delete_cell(RecoveryLockList, cell, prev); pfree(lock); *************** LogCurrentRunningXacts(RunningTransactio *** 992,998 **** if (CurrRunningXacts->subxid_overflow) elog(trace_recovery(DEBUG2), ! "snapshot of %u running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)", CurrRunningXacts->xcnt, (uint32) (recptr >> 32), (uint32) recptr, CurrRunningXacts->oldestRunningXid, --- 992,998 ---- if (CurrRunningXacts->subxid_overflow) elog(trace_recovery(DEBUG2), ! "snapshot of %u running transactions overflowed (lsn %X/%X oldest xid " XID_FMT " latest complete " XID_FMT " next xid " XID_FMT ")", CurrRunningXacts->xcnt, (uint32) (recptr >> 32), (uint32) recptr, CurrRunningXacts->oldestRunningXid, *************** LogCurrentRunningXacts(RunningTransactio *** 1000,1006 **** CurrRunningXacts->nextXid); else elog(trace_recovery(DEBUG2), ! "snapshot of %u+%u running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)", CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt, (uint32) (recptr >> 32), (uint32) recptr, CurrRunningXacts->oldestRunningXid, --- 1000,1006 ---- CurrRunningXacts->nextXid); else elog(trace_recovery(DEBUG2), ! "snapshot of %u+%u running transaction ids (lsn %X/%X oldest xid " XID_FMT " latest complete " XID_FMT " next xid " XID_FMT ")", CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt, (uint32) (recptr >> 32), (uint32) recptr, CurrRunningXacts->oldestRunningXid, diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c new file mode 100644 index 251a359..a3a1f04 *** a/src/backend/storage/lmgr/predicate.c --- b/src/backend/storage/lmgr/predicate.c *************** static void SetPossibleUnsafeConflict(SE *** 426,432 **** static void ReleaseRWConflict(RWConflict conflict); static void FlagSxactUnsafe(SERIALIZABLEXACT *sxact); - static bool OldSerXidPagePrecedesLogically(int p, int q); static void OldSerXidInit(void); static void OldSerXidAdd(TransactionId xid, SerCommitSeqNo minConflictCommitSeqNo); static SerCommitSeqNo OldSerXidGetMinConflictCommitSeqNo(TransactionId xid); --- 426,431 ---- *************** FlagSxactUnsafe(SERIALIZABLEXACT *sxact) *** 767,798 **** } } - /*------------------------------------------------------------------------*/ - - /* - * We will work on the page range of 0..OLDSERXID_MAX_PAGE. - * Compares using wraparound logic, as is required by slru.c. - */ - static bool - OldSerXidPagePrecedesLogically(int p, int q) - { - int diff; - - /* - * We have to compare modulo (OLDSERXID_MAX_PAGE+1)/2. Both inputs should - * be in the range 0..OLDSERXID_MAX_PAGE. - */ - Assert(p >= 0 && p <= OLDSERXID_MAX_PAGE); - Assert(q >= 0 && q <= OLDSERXID_MAX_PAGE); - - diff = p - q; - if (diff >= ((OLDSERXID_MAX_PAGE + 1) / 2)) - diff -= OLDSERXID_MAX_PAGE + 1; - else if (diff < -((int) (OLDSERXID_MAX_PAGE + 1) / 2)) - diff += OLDSERXID_MAX_PAGE + 1; - return diff < 0; - } - /* * Initialize for the tracking of old serializable committed xids. */ --- 766,771 ---- *************** OldSerXidInit(void) *** 804,810 **** /* * Set up SLRU management of the pg_serial data. */ - OldSerXidSlruCtl->PagePrecedes = OldSerXidPagePrecedesLogically; SimpleLruInit(OldSerXidSlruCtl, "oldserxid", NUM_OLDSERXID_BUFFERS, 0, OldSerXidLock, "pg_serial", LWTRANCHE_OLDSERXID_BUFFERS); --- 777,782 ---- *************** OldSerXidAdd(TransactionId xid, SerCommi *** 872,879 **** else { firstZeroPage = OldSerXidNextPage(oldSerXidControl->headPage); ! isNewPage = OldSerXidPagePrecedesLogically(oldSerXidControl->headPage, ! targetPage); } if (!TransactionIdIsValid(oldSerXidControl->headXid) --- 844,850 ---- else { firstZeroPage = OldSerXidNextPage(oldSerXidControl->headPage); ! isNewPage = oldSerXidControl->headPage < targetPage; } if (!TransactionIdIsValid(oldSerXidControl->headXid) *************** PredicateLockTuple(Relation relation, He *** 2556,2562 **** { TransactionId myxid; ! targetxmin = HeapTupleHeaderGetXmin(tuple->t_data); myxid = GetTopTransactionIdIfAny(); if (TransactionIdIsValid(myxid)) --- 2527,2533 ---- { TransactionId myxid; ! targetxmin = HeapTupleGetXmin(tuple); myxid = GetTopTransactionIdIfAny(); if (TransactionIdIsValid(myxid)) *************** CheckForSerializableConflictOut(bool vis *** 3978,3995 **** case HEAPTUPLE_LIVE: if (visible) return; ! xid = HeapTupleHeaderGetXmin(tuple->t_data); break; case HEAPTUPLE_RECENTLY_DEAD: if (!visible) return; ! xid = HeapTupleHeaderGetUpdateXid(tuple->t_data); break; case HEAPTUPLE_DELETE_IN_PROGRESS: ! xid = HeapTupleHeaderGetUpdateXid(tuple->t_data); break; case HEAPTUPLE_INSERT_IN_PROGRESS: ! xid = HeapTupleHeaderGetXmin(tuple->t_data); break; case HEAPTUPLE_DEAD: return; --- 3949,3966 ---- case HEAPTUPLE_LIVE: if (visible) return; ! xid = HeapTupleGetXmin(tuple); break; case HEAPTUPLE_RECENTLY_DEAD: if (!visible) return; ! xid = HeapTupleGetUpdateXidAny(tuple); break; case HEAPTUPLE_DELETE_IN_PROGRESS: ! xid = HeapTupleGetUpdateXidAny(tuple); break; case HEAPTUPLE_INSERT_IN_PROGRESS: ! xid = HeapTupleGetXmin(tuple); break; case HEAPTUPLE_DEAD: return; *************** CheckForSerializableConflictOut(bool vis *** 4050,4056 **** ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to read/write dependencies among transactions"), ! errdetail_internal("Reason code: Canceled on conflict out to old pivot %u.", xid), errhint("The transaction might succeed if retried."))); if (SxactHasSummaryConflictIn(MySerializableXact) --- 4021,4027 ---- ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to read/write dependencies among transactions"), ! errdetail_internal("Reason code: Canceled on conflict out to old pivot " XID_FMT ".", xid), errhint("The transaction might succeed if retried."))); if (SxactHasSummaryConflictIn(MySerializableXact) *************** CheckForSerializableConflictOut(bool vis *** 4058,4064 **** ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to read/write dependencies among transactions"), ! errdetail_internal("Reason code: Canceled on identification as a pivot, with conflict out to old committed transaction %u.", xid), errhint("The transaction might succeed if retried."))); MySerializableXact->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT; --- 4029,4035 ---- ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to read/write dependencies among transactions"), ! errdetail_internal("Reason code: Canceled on identification as a pivot, with conflict out to old committed transaction " XID_FMT ".", xid), errhint("The transaction might succeed if retried."))); MySerializableXact->flags |= SXACT_FLAG_SUMMARY_CONFLICT_OUT; *************** OnConflict_CheckForSerializationFailure( *** 4685,4691 **** ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to read/write dependencies among transactions"), ! errdetail_internal("Reason code: Canceled on conflict out to pivot %u, during read.", writer->topXid), errhint("The transaction might succeed if retried."))); } writer->flags |= SXACT_FLAG_DOOMED; --- 4656,4662 ---- ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to read/write dependencies among transactions"), ! errdetail_internal("Reason code: Canceled on conflict out to pivot " XID_FMT ", during read.", writer->topXid), errhint("The transaction might succeed if retried."))); } writer->flags |= SXACT_FLAG_DOOMED; diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c new file mode 100644 index 7a19d51..ef2b8ca *** a/src/backend/storage/page/bufpage.c --- b/src/backend/storage/page/bufpage.c *************** *** 20,25 **** --- 20,26 ---- #include "storage/checksum.h" #include "utils/memdebug.h" #include "utils/memutils.h" + #include "utils/snapmgr.h" /* GUC variable */ diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c new file mode 100644 index 973397c..90d0a61 *** a/src/backend/utils/adt/enum.c --- b/src/backend/utils/adt/enum.c *************** check_safe_enum_use(HeapTuple enumval_tu *** 71,77 **** * Usually, a row would get hinted as committed when it's read or loaded * into syscache; but just in case not, let's check the xmin directly. */ ! xmin = HeapTupleHeaderGetXmin(enumval_tup->t_data); if (!TransactionIdIsInProgress(xmin) && TransactionIdDidCommit(xmin)) return; --- 71,77 ---- * Usually, a row would get hinted as committed when it's read or loaded * into syscache; but just in case not, let's check the xmin directly. */ ! xmin = HeapTupleGetXmin(enumval_tup); if (!TransactionIdIsInProgress(xmin) && TransactionIdDidCommit(xmin)) return; *************** check_safe_enum_use(HeapTuple enumval_tu *** 98,104 **** * think it's too new and throw an unnecessary error, but we won't allow * an unsafe case.) */ ! if (xmin == HeapTupleHeaderGetXmin(enumtyp_tup->t_data) && !(enumtyp_tup->t_data->t_infomask & HEAP_UPDATED)) { /* same (sub)transaction, so safe */ --- 98,104 ---- * think it's too new and throw an unnecessary error, but we won't allow * an unsafe case.) */ ! if (xmin == HeapTupleGetXmin(enumtyp_tup) && !(enumtyp_tup->t_data->t_infomask & HEAP_UPDATED)) { /* same (sub)transaction, so safe */ diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c new file mode 100644 index 68feeb2..1b7a8c5 *** a/src/backend/utils/adt/jsonfuncs.c --- b/src/backend/utils/adt/jsonfuncs.c *************** populate_record(TupleDesc tupdesc, *** 3071,3076 **** --- 3071,3077 ---- tuple.t_len = HeapTupleHeaderGetDatumLength(defaultval); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = defaultval; /* Break down the tuple into fields */ *************** populate_recordset_record(PopulateRecord *** 3448,3453 **** --- 3449,3455 ---- tuple.t_len = HeapTupleHeaderGetDatumLength(tuphead); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = tuphead; tuplestore_puttuple(state->tuple_store, &tuple); diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c new file mode 100644 index 9e0a8ab..2729e1d *** a/src/backend/utils/adt/lockfuncs.c --- b/src/backend/utils/adt/lockfuncs.c *************** VXIDGetDatum(BackendId bid, LocalTransac *** 67,75 **** * The representation is "/", decimal and unsigned decimal * respectively. Note that elog.c also knows how to format a vxid. */ ! char vxidstr[32]; ! snprintf(vxidstr, sizeof(vxidstr), "%d/%u", bid, lxid); return CStringGetTextDatum(vxidstr); } --- 67,75 ---- * The representation is "/", decimal and unsigned decimal * respectively. Note that elog.c also knows how to format a vxid. */ ! char vxidstr[64]; ! snprintf(vxidstr, sizeof(vxidstr), "%d/" XID_FMT, bid, lxid); return CStringGetTextDatum(vxidstr); } *************** pg_lock_status(PG_FUNCTION_ARGS) *** 269,275 **** break; case LOCKTAG_TRANSACTION: values[6] = ! TransactionIdGetDatum(instance->locktag.locktag_field1); nulls[1] = true; nulls[2] = true; nulls[3] = true; --- 269,277 ---- break; case LOCKTAG_TRANSACTION: values[6] = ! TransactionIdGetDatum( ! (TransactionId)instance->locktag.locktag_field1 | ! ((TransactionId)instance->locktag.locktag_field2 << 32)); nulls[1] = true; nulls[2] = true; nulls[3] = true; *************** pg_lock_status(PG_FUNCTION_ARGS) *** 281,287 **** break; case LOCKTAG_VIRTUALTRANSACTION: values[5] = VXIDGetDatum(instance->locktag.locktag_field1, ! instance->locktag.locktag_field2); nulls[1] = true; nulls[2] = true; nulls[3] = true; --- 283,290 ---- break; case LOCKTAG_VIRTUALTRANSACTION: values[5] = VXIDGetDatum(instance->locktag.locktag_field1, ! (TransactionId)instance->locktag.locktag_field2 | ! ((TransactionId)instance->locktag.locktag_field3 << 32)); nulls[1] = true; nulls[2] = true; nulls[3] = true; diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c new file mode 100644 index c2891e6..18de55d *** a/src/backend/utils/adt/ri_triggers.c --- b/src/backend/utils/adt/ri_triggers.c *************** RI_FKey_fk_upd_check_required(Trigger *t *** 2167,2173 **** * UPDATE check. (We could skip this if we knew the INSERT * trigger already fired, but there is no easy way to know that.) */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(old_row->t_data))) return true; /* If all old and new key values are equal, no check is needed */ --- 2167,2173 ---- * UPDATE check. (We could skip this if we knew the INSERT * trigger already fired, but there is no easy way to know that.) */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(old_row))) return true; /* If all old and new key values are equal, no check is needed */ *************** RI_FKey_fk_upd_check_required(Trigger *t *** 2205,2211 **** * UPDATE check. (We could skip this if we knew the INSERT * trigger already fired, but there is no easy way to know that.) */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(old_row->t_data))) return true; /* If all old and new key values are equal, no check is needed */ --- 2205,2211 ---- * UPDATE check. (We could skip this if we knew the INSERT * trigger already fired, but there is no easy way to know that.) */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(old_row))) return true; /* If all old and new key values are equal, no check is needed */ diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c new file mode 100644 index 98fe00f..80adc7a *** a/src/backend/utils/adt/rowtypes.c --- b/src/backend/utils/adt/rowtypes.c *************** record_out(PG_FUNCTION_ARGS) *** 325,330 **** --- 325,331 ---- tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = rec; /* *************** record_send(PG_FUNCTION_ARGS) *** 672,677 **** --- 673,679 ---- tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = rec; /* *************** record_cmp(FunctionCallInfo fcinfo) *** 822,831 **** --- 824,835 ---- tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple1); tuple1.t_data = record1; tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple2); tuple2.t_data = record2; /* *************** record_eq(PG_FUNCTION_ARGS) *** 1065,1074 **** --- 1069,1080 ---- ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroBase(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroBase(&tuple2); /* * We arrange to look up the needed comparison info just once per series *************** record_image_cmp(FunctionCallInfo fcinfo *** 1328,1337 **** --- 1334,1345 ---- ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroBase(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroBase(&tuple2); /* * We arrange to look up the needed comparison info just once per series *************** record_image_eq(PG_FUNCTION_ARGS) *** 1609,1618 **** --- 1617,1628 ---- ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroBase(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroBase(&tuple2); /* * We arrange to look up the needed comparison info just once per series diff --git a/src/backend/utils/adt/txid.c b/src/backend/utils/adt/txid.c new file mode 100644 index 1e38ca2..18106e5 *** a/src/backend/utils/adt/txid.c --- b/src/backend/utils/adt/txid.c *************** *** 38,48 **** /* txid will be signed int8 in database, so must limit to 63 bits */ #define MAX_TXID ((uint64) PG_INT64_MAX) - /* Use unsigned variant internally */ - typedef uint64 txid; - - /* sprintf format code for uint64 */ - #define TXID_FMT UINT64_FORMAT /* * If defined, use bsearch() function for searching for txids in snapshots --- 38,43 ---- *************** typedef struct *** 61,186 **** * * Explicit embedding is ok as we want always correct alignment anyway. */ ! int32 __varsz; ! uint32 nxip; /* number of txids in xip array */ ! txid xmin; ! txid xmax; /* in-progress txids, xmin <= xip[i] < xmax: */ ! txid xip[FLEXIBLE_ARRAY_MEMBER]; } TxidSnapshot; #define TXID_SNAPSHOT_SIZE(nxip) \ ! (offsetof(TxidSnapshot, xip) + sizeof(txid) * (nxip)) #define TXID_SNAPSHOT_MAX_NXIP \ ! ((MaxAllocSize - offsetof(TxidSnapshot, xip)) / sizeof(txid)) ! ! /* ! * Epoch values from xact.c ! */ ! typedef struct ! { ! TransactionId last_xid; ! uint32 epoch; ! } TxidEpoch; ! ! ! /* ! * Fetch epoch data from xact.c. ! */ ! static void ! load_xid_epoch(TxidEpoch *state) ! { ! GetNextXidAndEpoch(&state->last_xid, &state->epoch); ! } ! ! /* ! * Helper to get a TransactionId from a 64-bit xid with wraparound detection. ! * ! * It is an ERROR if the xid is in the future. Otherwise, returns true if ! * the transaction is still new enough that we can determine whether it ! * committed and false otherwise. If *extracted_xid is not NULL, it is set ! * to the low 32 bits of the transaction ID (i.e. the actual XID, without the ! * epoch). ! * ! * The caller must hold CLogTruncationLock since it's dealing with arbitrary ! * XIDs, and must continue to hold it until it's done with any clog lookups ! * relating to those XIDs. ! */ ! static bool ! TransactionIdInRecentPast(uint64 xid_with_epoch, TransactionId *extracted_xid) ! { ! uint32 xid_epoch = (uint32) (xid_with_epoch >> 32); ! TransactionId xid = (TransactionId) xid_with_epoch; ! uint32 now_epoch; ! TransactionId now_epoch_last_xid; ! ! GetNextXidAndEpoch(&now_epoch_last_xid, &now_epoch); ! ! if (extracted_xid != NULL) ! *extracted_xid = xid; ! ! if (!TransactionIdIsValid(xid)) ! return false; ! ! /* For non-normal transaction IDs, we can ignore the epoch. */ ! if (!TransactionIdIsNormal(xid)) ! return true; ! ! /* If the transaction ID is in the future, throw an error. */ ! if (xid_epoch > now_epoch ! || (xid_epoch == now_epoch && xid > now_epoch_last_xid)) ! ereport(ERROR, ! (errcode(ERRCODE_INVALID_PARAMETER_VALUE), ! errmsg("transaction ID %s is in the future", ! psprintf(UINT64_FORMAT, xid_with_epoch)))); ! ! /* ! * ShmemVariableCache->oldestClogXid is protected by CLogTruncationLock, ! * but we don't acquire that lock here. Instead, we require the caller to ! * acquire it, because the caller is presumably going to look up the ! * returned XID. If we took and released the lock within this function, a ! * CLOG truncation could occur before the caller finished with the XID. ! */ ! Assert(LWLockHeldByMe(CLogTruncationLock)); ! ! /* ! * If the transaction ID has wrapped around, it's definitely too old to ! * determine the commit status. Otherwise, we can compare it to ! * ShmemVariableCache->oldestClogXid to determine whether the relevant ! * CLOG entry is guaranteed to still exist. ! */ ! if (xid_epoch + 1 < now_epoch ! || (xid_epoch + 1 == now_epoch && xid < now_epoch_last_xid) ! || TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid)) ! return false; ! ! return true; ! } ! ! /* ! * do a TransactionId -> txid conversion for an XID near the given epoch ! */ ! static txid ! convert_xid(TransactionId xid, const TxidEpoch *state) ! { ! uint64 epoch; ! ! /* return special xid's as-is */ ! if (!TransactionIdIsNormal(xid)) ! return (txid) xid; ! ! /* xid can be on either side when near wrap-around */ ! epoch = (uint64) state->epoch; ! if (xid > state->last_xid && ! TransactionIdPrecedes(xid, state->last_xid)) ! epoch--; ! else if (xid < state->last_xid && ! TransactionIdFollows(xid, state->last_xid)) ! epoch++; ! ! return (epoch << 32) | xid; ! } /* * txid comparator for qsort/bsearch --- 56,74 ---- * * Explicit embedding is ok as we want always correct alignment anyway. */ ! int32 __varsz; ! uint32 nxip; /* number of txids in xip array */ ! TransactionId xmin; ! TransactionId xmax; /* in-progress txids, xmin <= xip[i] < xmax: */ ! TransactionId xip[FLEXIBLE_ARRAY_MEMBER]; } TxidSnapshot; #define TXID_SNAPSHOT_SIZE(nxip) \ ! (offsetof(TxidSnapshot, xip) + sizeof(TransactionId) * (nxip)) #define TXID_SNAPSHOT_MAX_NXIP \ ! ((MaxAllocSize - offsetof(TxidSnapshot, xip)) / sizeof(TransactionId)) /* * txid comparator for qsort/bsearch *************** convert_xid(TransactionId xid, const Txi *** 188,195 **** static int cmp_txid(const void *aa, const void *bb) { ! txid a = *(const txid *) aa; ! txid b = *(const txid *) bb; if (a < b) return -1; --- 76,83 ---- static int cmp_txid(const void *aa, const void *bb) { ! TransactionId a = *(const TransactionId *) aa; ! TransactionId b = *(const TransactionId *) bb; if (a < b) return -1; *************** cmp_txid(const void *aa, const void *bb) *** 208,221 **** static void sort_snapshot(TxidSnapshot *snap) { ! txid last = 0; ! int nxip, ! idx1, ! idx2; if (snap->nxip > 1) { ! qsort(snap->xip, snap->nxip, sizeof(txid), cmp_txid); /* remove duplicates */ nxip = snap->nxip; --- 96,109 ---- static void sort_snapshot(TxidSnapshot *snap) { ! TransactionId last = 0; ! int nxip, ! idx1, ! idx2; if (snap->nxip > 1) { ! qsort(snap->xip, snap->nxip, sizeof(TransactionId), cmp_txid); /* remove duplicates */ nxip = snap->nxip; *************** sort_snapshot(TxidSnapshot *snap) *** 235,241 **** * check txid visibility. */ static bool ! is_visible_txid(txid value, const TxidSnapshot *snap) { if (value < snap->xmin) return true; --- 123,129 ---- * check txid visibility. */ static bool ! is_visible_txid(TransactionId value, const TxidSnapshot *snap) { if (value < snap->xmin) return true; *************** is_visible_txid(txid value, const TxidSn *** 246,252 **** { void *res; ! res = bsearch(&value, snap->xip, snap->nxip, sizeof(txid), cmp_txid); /* if found, transaction is still in progress */ return (res) ? false : true; } --- 134,140 ---- { void *res; ! res = bsearch(&value, snap->xip, snap->nxip, sizeof(TransactionId), cmp_txid); /* if found, transaction is still in progress */ return (res) ? false : true; } *************** is_visible_txid(txid value, const TxidSn *** 269,275 **** */ static StringInfo ! buf_init(txid xmin, txid xmax) { TxidSnapshot snap; StringInfo buf; --- 157,163 ---- */ static StringInfo ! buf_init(TransactionId xmin, TransactionId xmax) { TxidSnapshot snap; StringInfo buf; *************** buf_init(txid xmin, txid xmax) *** 284,290 **** } static void ! buf_add_txid(StringInfo buf, txid xid) { TxidSnapshot *snap = (TxidSnapshot *) buf->data; --- 172,178 ---- } static void ! buf_add_txid(StringInfo buf, TransactionId xid) { TxidSnapshot *snap = (TxidSnapshot *) buf->data; *************** buf_finalize(StringInfo buf) *** 311,324 **** /* * simple number parser. * ! * We return 0 on error, which is invalid value for txid. */ ! static txid str2txid(const char *s, const char **endp) { ! txid val = 0; ! txid cutoff = MAX_TXID / 10; ! txid cutlim = MAX_TXID % 10; for (; *s; s++) { --- 199,212 ---- /* * simple number parser. * ! * We return 0 on error, which is invalid value for TransactionId. */ ! static TransactionId str2txid(const char *s, const char **endp) { ! TransactionId val = 0; ! TransactionId cutoff = MAX_TXID / 10; ! TransactionId cutlim = MAX_TXID % 10; for (; *s; s++) { *************** str2txid(const char *s, const char **end *** 350,362 **** static TxidSnapshot * parse_snapshot(const char *str) { ! txid xmin; ! txid xmax; ! txid last_val = 0, ! val; ! const char *str_start = str; ! const char *endp; ! StringInfo buf; xmin = str2txid(str, &endp); if (*endp != ':') --- 238,250 ---- static TxidSnapshot * parse_snapshot(const char *str) { ! TransactionId xmin; ! TransactionId xmax; ! TransactionId last_val = 0, ! val; ! const char *str_start = str; ! const char *endp; ! StringInfo buf; xmin = str2txid(str, &endp); if (*endp != ':') *************** bad_format: *** 418,448 **** /* * txid_current() returns int8 * ! * Return the current toplevel transaction ID as TXID ! * If the current transaction does not have one, one is assigned. ! * ! * This value has the epoch as the high 32 bits and the 32-bit xid ! * as the low 32 bits. */ Datum txid_current(PG_FUNCTION_ARGS) { ! txid val; ! TxidEpoch state; ! ! /* ! * Must prevent during recovery because if an xid is not assigned we try ! * to assign one, which would fail. Programs already rely on this function ! * to always return a valid current xid, so we should not change this to ! * return NULL or similar invalid xid. ! */ ! PreventCommandDuringRecovery("txid_current()"); ! ! load_xid_epoch(&state); ! ! val = convert_xid(GetTopTransactionId(), &state); ! ! PG_RETURN_INT64(val); } /* --- 306,317 ---- /* * txid_current() returns int8 * ! * Return the current toplevel transaction ID */ Datum txid_current(PG_FUNCTION_ARGS) { ! PG_RETURN_INT64(GetTopTransactionId()); } /* *************** txid_current(PG_FUNCTION_ARGS) *** 452,469 **** Datum txid_current_if_assigned(PG_FUNCTION_ARGS) { ! txid val; ! TxidEpoch state; ! TransactionId topxid = GetTopTransactionIdIfAny(); ! if (topxid == InvalidTransactionId) PG_RETURN_NULL(); - - load_xid_epoch(&state); - - val = convert_xid(topxid, &state); - - PG_RETURN_INT64(val); } /* --- 321,332 ---- Datum txid_current_if_assigned(PG_FUNCTION_ARGS) { ! TransactionId xid = GetTopTransactionIdIfAny(); ! if (TransactionIdIsValid(xid)) ! PG_RETURN_INT64(xid); ! else PG_RETURN_NULL(); } /* *************** txid_current_snapshot(PG_FUNCTION_ARGS) *** 479,493 **** TxidSnapshot *snap; uint32 nxip, i; - TxidEpoch state; Snapshot cur; cur = GetActiveSnapshot(); if (cur == NULL) elog(ERROR, "no active snapshot set"); - load_xid_epoch(&state); - /* * Compile-time limits on the procarray (MAX_BACKENDS processes plus * MAX_BACKENDS prepared transactions) guarantee nxip won't be too large. --- 342,353 ---- *************** txid_current_snapshot(PG_FUNCTION_ARGS) *** 500,510 **** snap = palloc(TXID_SNAPSHOT_SIZE(nxip)); /* fill */ ! snap->xmin = convert_xid(cur->xmin, &state); ! snap->xmax = convert_xid(cur->xmax, &state); snap->nxip = nxip; for (i = 0; i < nxip; i++) ! snap->xip[i] = convert_xid(cur->xip[i], &state); /* * We want them guaranteed to be in ascending order. This also removes --- 360,370 ---- snap = palloc(TXID_SNAPSHOT_SIZE(nxip)); /* fill */ ! snap->xmin = cur->xmin; ! snap->xmax = cur->xmax; snap->nxip = nxip; for (i = 0; i < nxip; i++) ! snap->xip[i] = cur->xip[i]; /* * We want them guaranteed to be in ascending order. This also removes *************** txid_snapshot_out(PG_FUNCTION_ARGS) *** 551,564 **** initStringInfo(&str); ! appendStringInfo(&str, TXID_FMT ":", snap->xmin); ! appendStringInfo(&str, TXID_FMT ":", snap->xmax); for (i = 0; i < snap->nxip; i++) { if (i > 0) appendStringInfoChar(&str, ','); ! appendStringInfo(&str, TXID_FMT, snap->xip[i]); } PG_RETURN_CSTRING(str.data); --- 411,424 ---- initStringInfo(&str); ! appendStringInfo(&str, XID_FMT ":", snap->xmin); ! appendStringInfo(&str, XID_FMT ":", snap->xmax); for (i = 0; i < snap->nxip; i++) { if (i > 0) appendStringInfoChar(&str, ','); ! appendStringInfo(&str, XID_FMT, snap->xip[i]); } PG_RETURN_CSTRING(str.data); *************** txid_snapshot_out(PG_FUNCTION_ARGS) *** 574,586 **** Datum txid_snapshot_recv(PG_FUNCTION_ARGS) { ! StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); ! TxidSnapshot *snap; ! txid last = 0; ! int nxip; ! int i; ! txid xmin, ! xmax; /* load and validate nxip */ nxip = pq_getmsgint(buf, 4); --- 434,446 ---- Datum txid_snapshot_recv(PG_FUNCTION_ARGS) { ! StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); ! TxidSnapshot *snap; ! TransactionId last = 0; ! int nxip; ! int i; ! TransactionId xmin, ! xmax; /* load and validate nxip */ nxip = pq_getmsgint(buf, 4); *************** txid_snapshot_recv(PG_FUNCTION_ARGS) *** 598,604 **** for (i = 0; i < nxip; i++) { ! txid cur = pq_getmsgint64(buf); if (cur < last || cur < xmin || cur >= xmax) goto bad_format; --- 458,464 ---- for (i = 0; i < nxip; i++) { ! TransactionId cur = pq_getmsgint64(buf); if (cur < last || cur < xmin || cur >= xmax) goto bad_format; *************** txid_snapshot_send(PG_FUNCTION_ARGS) *** 656,663 **** Datum txid_visible_in_snapshot(PG_FUNCTION_ARGS) { ! txid value = PG_GETARG_INT64(0); ! TxidSnapshot *snap = (TxidSnapshot *) PG_GETARG_VARLENA_P(1); PG_RETURN_BOOL(is_visible_txid(value, snap)); } --- 516,523 ---- Datum txid_visible_in_snapshot(PG_FUNCTION_ARGS) { ! TransactionId value = PG_GETARG_INT64(0); ! TxidSnapshot *snap = (TxidSnapshot *) PG_GETARG_VARLENA_P(1); PG_RETURN_BOOL(is_visible_txid(value, snap)); } *************** Datum *** 697,704 **** txid_snapshot_xip(PG_FUNCTION_ARGS) { FuncCallContext *fctx; ! TxidSnapshot *snap; ! txid value; /* on first call initialize snap_state and get copy of snapshot */ if (SRF_IS_FIRSTCALL()) --- 557,564 ---- txid_snapshot_xip(PG_FUNCTION_ARGS) { FuncCallContext *fctx; ! TxidSnapshot *snap; ! TransactionId value; /* on first call initialize snap_state and get copy of snapshot */ if (SRF_IS_FIRSTCALL()) *************** txid_snapshot_xip(PG_FUNCTION_ARGS) *** 732,739 **** * Report the status of a recent transaction ID, or null for wrapped, * truncated away or otherwise too old XIDs. * ! * The passed epoch-qualified xid is treated as a normal xid, not a ! * multixact id. * * If it points to a committed subxact the result is the subxact status even * though the parent xact may still be in progress or may have aborted. --- 592,598 ---- * Report the status of a recent transaction ID, or null for wrapped, * truncated away or otherwise too old XIDs. * ! * The passed xid is treated as a normal xid, not a multixact id. * * If it points to a committed subxact the result is the subxact status even * though the parent xact may still be in progress or may have aborted. *************** Datum *** 742,759 **** txid_status(PG_FUNCTION_ARGS) { const char *status; ! uint64 xid_with_epoch = PG_GETARG_INT64(0); ! TransactionId xid; /* * We must protect against concurrent truncation of clog entries to avoid * an I/O error on SLRU lookup. */ LWLockAcquire(CLogTruncationLock, LW_SHARED); ! if (TransactionIdInRecentPast(xid_with_epoch, &xid)) ! { ! Assert(TransactionIdIsValid(xid)); if (TransactionIdIsCurrentTransactionId(xid)) status = "in progress"; else if (TransactionIdDidCommit(xid)) --- 601,629 ---- txid_status(PG_FUNCTION_ARGS) { const char *status; ! TransactionId xid = PG_GETARG_TRANSACTIONID(0); /* * We must protect against concurrent truncation of clog entries to avoid * an I/O error on SLRU lookup. */ LWLockAcquire(CLogTruncationLock, LW_SHARED); ! Assert(TransactionIdIsValid(xid)); + if (TransactionIdIsNormal(xid) && + TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid)) + { + status = NULL; + } + else if (TransactionIdPrecedesOrEquals(ReadNewTransactionId(), xid)) + { + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("transaction ID " XID_FMT " is in the future", + xid))); + } + else + { if (TransactionIdIsCurrentTransactionId(xid)) status = "in progress"; else if (TransactionIdDidCommit(xid)) *************** txid_status(PG_FUNCTION_ARGS) *** 779,788 **** status = "in progress"; } } - else - { - status = NULL; - } LWLockRelease(CLogTruncationLock); if (status == NULL) --- 649,654 ---- diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c new file mode 100644 index 2051709..6d327fb *** a/src/backend/utils/adt/xid.c --- b/src/backend/utils/adt/xid.c *************** *** 22,30 **** #include "libpq/pqformat.h" #include "utils/builtins.h" - #define PG_GETARG_TRANSACTIONID(n) DatumGetTransactionId(PG_GETARG_DATUM(n)) - #define PG_RETURN_TRANSACTIONID(x) return TransactionIdGetDatum(x) - #define PG_GETARG_COMMANDID(n) DatumGetCommandId(PG_GETARG_DATUM(n)) #define PG_RETURN_COMMANDID(x) return CommandIdGetDatum(x) --- 22,27 ---- *************** xidin(PG_FUNCTION_ARGS) *** 34,49 **** { char *str = PG_GETARG_CSTRING(0); ! PG_RETURN_TRANSACTIONID((TransactionId) strtoul(str, NULL, 0)); } Datum xidout(PG_FUNCTION_ARGS) { TransactionId transactionId = PG_GETARG_TRANSACTIONID(0); ! char *result = (char *) palloc(16); ! snprintf(result, 16, "%lu", (unsigned long) transactionId); PG_RETURN_CSTRING(result); } --- 31,46 ---- { char *str = PG_GETARG_CSTRING(0); ! PG_RETURN_TRANSACTIONID((TransactionId) pg_strtouint64(str, NULL, 0)); } Datum xidout(PG_FUNCTION_ARGS) { TransactionId transactionId = PG_GETARG_TRANSACTIONID(0); ! char *result = (char *) palloc(32); ! snprintf(result, 32, XID_FMT, transactionId); PG_RETURN_CSTRING(result); } *************** Datum *** 54,61 **** xidrecv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); ! PG_RETURN_TRANSACTIONID((TransactionId) pq_getmsgint(buf, sizeof(TransactionId))); } /* --- 51,62 ---- xidrecv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + uint32 lo, hi; ! lo = (uint32) pq_getmsgint(buf, sizeof(TransactionId)); ! hi = (uint32) pq_getmsgint(buf, sizeof(TransactionId)); ! ! PG_RETURN_TRANSACTIONID((uint64) lo + ((uint64) hi << 32)); } /* *************** xidsend(PG_FUNCTION_ARGS) *** 66,74 **** { TransactionId arg1 = PG_GETARG_TRANSACTIONID(0); StringInfoData buf; pq_begintypsend(&buf); ! pq_sendint(&buf, arg1, sizeof(arg1)); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } --- 67,80 ---- { TransactionId arg1 = PG_GETARG_TRANSACTIONID(0); StringInfoData buf; + uint32 lo, hi; + + lo = (uint32) (arg1 & 0xFFFFFFFF); + hi = (uint32) (arg1 >> 32); pq_begintypsend(&buf); ! pq_sendint(&buf, lo, sizeof(lo)); ! pq_sendint(&buf, hi, sizeof(hi)); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c new file mode 100644 index b8e3780..b7e1c8e *** a/src/backend/utils/cache/relcache.c --- b/src/backend/utils/cache/relcache.c *************** RelationReloadIndexInfo(Relation relatio *** 2292,2299 **** relation->rd_index->indislive = index->indislive; /* Copy xmin too, as that is needed to make sense of indcheckxmin */ ! HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data, ! HeapTupleHeaderGetXmin(tuple->t_data)); ReleaseSysCache(tuple); } --- 2292,2300 ---- relation->rd_index->indislive = index->indislive; /* Copy xmin too, as that is needed to make sense of indcheckxmin */ ! relation->rd_indextuple->t_xid_base = tuple->t_xid_base; ! HeapTupleSetXmin(relation->rd_indextuple, ! HeapTupleGetXmin(tuple)); ReleaseSysCache(tuple); } diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c new file mode 100644 index 977c038..712b94f *** a/src/backend/utils/error/elog.c --- b/src/backend/utils/error/elog.c *************** log_line_prefix(StringInfo buf, ErrorDat *** 2584,2595 **** { char strfbuf[128]; ! snprintf(strfbuf, sizeof(strfbuf) - 1, "%d/%u", MyProc->backendId, MyProc->lxid); appendStringInfo(buf, "%*s", padding, strfbuf); } else ! appendStringInfo(buf, "%d/%u", MyProc->backendId, MyProc->lxid); } else if (padding != 0) appendStringInfoSpaces(buf, --- 2584,2595 ---- { char strfbuf[128]; ! snprintf(strfbuf, sizeof(strfbuf) - 1, "%d/" XID_FMT, MyProc->backendId, MyProc->lxid); appendStringInfo(buf, "%*s", padding, strfbuf); } else ! appendStringInfo(buf, "%d/" XID_FMT, MyProc->backendId, MyProc->lxid); } else if (padding != 0) appendStringInfoSpaces(buf, *************** log_line_prefix(StringInfo buf, ErrorDat *** 2597,2605 **** break; case 'x': if (padding != 0) ! appendStringInfo(buf, "%*u", padding, GetTopTransactionIdIfAny()); else ! appendStringInfo(buf, "%u", GetTopTransactionIdIfAny()); break; case 'e': if (padding != 0) --- 2597,2605 ---- break; case 'x': if (padding != 0) ! appendStringInfo(buf, "%*" INT64_MODIFIER "u", padding, GetTopTransactionIdIfAny()); else ! appendStringInfo(buf, XID_FMT, GetTopTransactionIdIfAny()); break; case 'e': if (padding != 0) *************** write_csvlog(ErrorData *edata) *** 2746,2756 **** /* Virtual transaction id */ /* keep VXID format in sync with lockfuncs.c */ if (MyProc != NULL && MyProc->backendId != InvalidBackendId) ! appendStringInfo(&buf, "%d/%u", MyProc->backendId, MyProc->lxid); appendStringInfoChar(&buf, ','); /* Transaction id */ ! appendStringInfo(&buf, "%u", GetTopTransactionIdIfAny()); appendStringInfoChar(&buf, ','); /* Error severity */ --- 2746,2756 ---- /* Virtual transaction id */ /* keep VXID format in sync with lockfuncs.c */ if (MyProc != NULL && MyProc->backendId != InvalidBackendId) ! appendStringInfo(&buf, "%d/" XID_FMT, MyProc->backendId, MyProc->lxid); appendStringInfoChar(&buf, ','); /* Transaction id */ ! appendStringInfo(&buf, XID_FMT, GetTopTransactionIdIfAny()); appendStringInfoChar(&buf, ','); /* Error severity */ diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c new file mode 100644 index a7b0782..f6ef322 *** a/src/backend/utils/fmgr/fmgr.c --- b/src/backend/utils/fmgr/fmgr.c *************** lookup_C_func(HeapTuple procedureTuple) *** 455,461 **** NULL); if (entry == NULL) return NULL; /* no such entry */ ! if (entry->fn_xmin == HeapTupleHeaderGetRawXmin(procedureTuple->t_data) && ItemPointerEquals(&entry->fn_tid, &procedureTuple->t_self)) return entry; /* OK */ return NULL; /* entry is out of date */ --- 455,461 ---- NULL); if (entry == NULL) return NULL; /* no such entry */ ! if (entry->fn_xmin == HeapTupleGetRawXmin(procedureTuple) && ItemPointerEquals(&entry->fn_tid, &procedureTuple->t_self)) return entry; /* OK */ return NULL; /* entry is out of date */ *************** record_C_func(HeapTuple procedureTuple, *** 492,498 **** HASH_ENTER, &found); /* OID is already filled in */ ! entry->fn_xmin = HeapTupleHeaderGetRawXmin(procedureTuple->t_data); entry->fn_tid = procedureTuple->t_self; entry->user_fn = user_fn; entry->inforec = inforec; --- 492,498 ---- HASH_ENTER, &found); /* OID is already filled in */ ! entry->fn_xmin = HeapTupleGetRawXmin(procedureTuple); entry->fn_tid = procedureTuple->t_self; entry->user_fn = user_fn; entry->inforec = inforec; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c new file mode 100644 index 1226f67..a1692cf *** a/src/backend/utils/misc/guc.c --- b/src/backend/utils/misc/guc.c *************** static struct config_int ConfigureNamesI *** 2128,2183 **** NULL, NULL, NULL }, - { - {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Minimum age at which VACUUM should freeze a table row."), - NULL - }, - &vacuum_freeze_min_age, - 50000000, 0, 1000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."), - NULL - }, - &vacuum_freeze_table_age, - 150000000, 0, 2000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."), - NULL - }, - &vacuum_multixact_freeze_min_age, - 5000000, 0, 1000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."), - NULL - }, - &vacuum_multixact_freeze_table_age, - 150000000, 0, 2000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_defer_cleanup_age", PGC_SIGHUP, REPLICATION_MASTER, - gettext_noop("Number of transactions by which VACUUM and HOT cleanup should be deferred, if any."), - NULL - }, - &vacuum_defer_cleanup_age, - 0, 0, 1000000, - NULL, NULL, NULL - }, - /* * See also CheckRequiredParameterValues() if this parameter changes */ --- 2128,2133 ---- *************** static struct config_int ConfigureNamesI *** 2680,2706 **** NULL, NULL, NULL }, { - /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ - {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, - gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."), - NULL - }, - &autovacuum_freeze_max_age, - /* see pg_resetwal if you change the upper-limit value */ - 200000000, 100000, 2000000000, - NULL, NULL, NULL - }, - { - /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ - {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, - gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."), - NULL - }, - &autovacuum_multixact_freeze_max_age, - 400000000, 10000, 2000000000, - NULL, NULL, NULL - }, - { /* see max_connections */ {"autovacuum_max_workers", PGC_POSTMASTER, AUTOVACUUM, gettext_noop("Sets the maximum number of simultaneously running autovacuum worker processes."), --- 2630,2635 ---- *************** static struct config_real ConfigureNames *** 3059,3064 **** --- 2988,3065 ---- static struct config_int64 ConfigureNamesInt64[] = { + { + {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Minimum age at which VACUUM should freeze a table row."), + NULL + }, + &vacuum_freeze_min_age, + INT64CONST(50000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."), + NULL + }, + &vacuum_freeze_table_age, + INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."), + NULL + }, + &vacuum_multixact_freeze_min_age, + INT64CONST(5000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."), + NULL + }, + &vacuum_multixact_freeze_table_age, + INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_defer_cleanup_age", PGC_SIGHUP, REPLICATION_MASTER, + gettext_noop("Number of transactions by which VACUUM and HOT cleanup should be deferred, if any."), + NULL + }, + &vacuum_defer_cleanup_age, + INT64CONST(0), INT64CONST(0), INT64CONST(1000000), + NULL, NULL, NULL + }, + + { + /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ + {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, + gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."), + NULL + }, + &autovacuum_freeze_max_age, + /* see pg_resetxlog if you change the upper-limit value */ + INT64CONST(10000000000), INT64CONST(100000), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + { + /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ + {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, + gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."), + NULL + }, + &autovacuum_multixact_freeze_max_age, + INT64CONST(20000000000), INT64CONST(10000), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c new file mode 100644 index 0dbfe7f..840af09 *** a/src/backend/utils/misc/pg_controldata.c --- b/src/backend/utils/misc/pg_controldata.c *************** pg_control_checkpoint(PG_FUNCTION_ARGS) *** 166,173 **** values[6] = BoolGetDatum(ControlFile->checkPointCopy.fullPageWrites); nulls[6] = false; ! values[7] = CStringGetTextDatum(psprintf("%u:%u", ! ControlFile->checkPointCopy.nextXidEpoch, ControlFile->checkPointCopy.nextXid)); nulls[7] = false; --- 166,172 ---- values[6] = BoolGetDatum(ControlFile->checkPointCopy.fullPageWrites); nulls[6] = false; ! values[7] = CStringGetTextDatum(psprintf(XID_FMT, ControlFile->checkPointCopy.nextXid)); nulls[7] = false; diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c new file mode 100644 index 17e1b68..163f09d *** a/src/backend/utils/sort/tuplesort.c --- b/src/backend/utils/sort/tuplesort.c *************** readtup_cluster(Tuplesortstate *state, S *** 4052,4057 **** --- 4052,4058 ---- &tuple->t_self, sizeof(ItemPointerData)); /* We don't currently bother to reconstruct t_tableOid */ tuple->t_tableOid = InvalidOid; + HeapTupleSetZeroBase(tuple); /* Read in the tuple body */ LogicalTapeReadExact(state->tapeset, tapenum, tuple->t_data, tuple->t_len); diff --git a/src/backend/utils/time/combocid.c b/src/backend/utils/time/combocid.c new file mode 100644 index c7e4331..a84a6cd *** a/src/backend/utils/time/combocid.c --- b/src/backend/utils/time/combocid.c *************** HeapTupleHeaderGetCmin(HeapTupleHeader t *** 107,113 **** CommandId cid = HeapTupleHeaderGetRawCommandId(tup); Assert(!(tup->t_infomask & HEAP_MOVED)); ! Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tup))); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmin(cid); --- 107,114 ---- CommandId cid = HeapTupleHeaderGetRawCommandId(tup); Assert(!(tup->t_infomask & HEAP_MOVED)); ! /* FIXME */ ! /*Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tup)));*/ if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmin(cid); *************** HeapTupleHeaderGetCmax(HeapTupleHeader t *** 128,135 **** * weakens the check, but not using GetCmax() inside one would complicate * things too much. */ ! Assert(CritSectionCount > 0 || ! TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tup))); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmax(cid); --- 129,137 ---- * weakens the check, but not using GetCmax() inside one would complicate * things too much. */ ! /* FIXME*/ ! /*Assert(CritSectionCount > 0 || ! TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tup)));*/ if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmax(cid); *************** HeapTupleHeaderGetCmax(HeapTupleHeader t *** 151,157 **** * changes the tuple in shared buffers. */ void ! HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo) { --- 153,159 ---- * changes the tuple in shared buffers. */ void ! HeapTupleHeaderAdjustCmax(HeapTuple tup, CommandId *cmax, bool *iscombo) { *************** HeapTupleHeaderAdjustCmax(HeapTupleHeade *** 161,170 **** * Test for HeapTupleHeaderXminCommitted() first, because it's cheaper * than a TransactionIdIsCurrentTransactionId call. */ ! if (!HeapTupleHeaderXminCommitted(tup) && ! TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tup))) { ! CommandId cmin = HeapTupleHeaderGetCmin(tup); *cmax = GetComboCommandId(cmin, *cmax); *iscombo = true; --- 163,172 ---- * Test for HeapTupleHeaderXminCommitted() first, because it's cheaper * than a TransactionIdIsCurrentTransactionId call. */ ! if (!HeapTupleHeaderXminCommitted(tup->t_data) && ! TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(tup))) { ! CommandId cmin = HeapTupleHeaderGetCmin(tup->t_data); *cmax = GetComboCommandId(cmin, *cmax); *iscombo = true; diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c new file mode 100644 index 294ab70..3727be7 *** a/src/backend/utils/time/snapmgr.c --- b/src/backend/utils/time/snapmgr.c *************** ExportSnapshot(Snapshot snapshot) *** 1210,1217 **** * Generate file path for the snapshot. We start numbering of snapshots * inside the transaction from 1. */ ! snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d", ! MyProc->backendId, MyProc->lxid, list_length(exportedSnapshots) + 1); /* * Copy the snapshot into TopTransactionContext, add it to the --- 1210,1218 ---- * Generate file path for the snapshot. We start numbering of snapshots * inside the transaction from 1. */ ! snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X%08X-%d", ! MyProc->backendId, (uint32) (MyProc->lxid >> 32), ! (uint32) MyProc->lxid, list_length(exportedSnapshots) + 1); /* * Copy the snapshot into TopTransactionContext, add it to the *************** ExportSnapshot(Snapshot snapshot) *** 1238,1251 **** */ initStringInfo(&buf); ! appendStringInfo(&buf, "vxid:%d/%u\n", MyProc->backendId, MyProc->lxid); appendStringInfo(&buf, "pid:%d\n", MyProcPid); appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId); appendStringInfo(&buf, "iso:%d\n", XactIsoLevel); appendStringInfo(&buf, "ro:%d\n", XactReadOnly); ! appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin); ! appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax); /* * We must include our own top transaction ID in the top-xid data, since --- 1239,1252 ---- */ initStringInfo(&buf); ! appendStringInfo(&buf, "vxid:%d/" XID_FMT "\n", MyProc->backendId, MyProc->lxid); appendStringInfo(&buf, "pid:%d\n", MyProcPid); appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId); appendStringInfo(&buf, "iso:%d\n", XactIsoLevel); appendStringInfo(&buf, "ro:%d\n", XactReadOnly); ! appendStringInfo(&buf, "xmin:" XID_FMT "\n", snapshot->xmin); ! appendStringInfo(&buf, "xmax:" XID_FMT "\n", snapshot->xmax); /* * We must include our own top transaction ID in the top-xid data, since *************** ExportSnapshot(Snapshot snapshot) *** 1262,1270 **** TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0; appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid); for (i = 0; i < snapshot->xcnt; i++) ! appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]); if (addTopXid) ! appendStringInfo(&buf, "xip:%u\n", topXid); /* * Similarly, we add our subcommitted child XIDs to the subxid data. Here, --- 1263,1271 ---- TransactionIdPrecedes(topXid, snapshot->xmax)) ? 1 : 0; appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid); for (i = 0; i < snapshot->xcnt; i++) ! appendStringInfo(&buf, "xip:" XID_FMT "\n", snapshot->xip[i]); if (addTopXid) ! appendStringInfo(&buf, "xip:" XID_FMT "\n", topXid); /* * Similarly, we add our subcommitted child XIDs to the subxid data. Here, *************** ExportSnapshot(Snapshot snapshot) *** 1278,1286 **** appendStringInfoString(&buf, "sof:0\n"); appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren); for (i = 0; i < snapshot->subxcnt; i++) ! appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]); for (i = 0; i < nchildren; i++) ! appendStringInfo(&buf, "sxp:%u\n", children[i]); } appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery); --- 1279,1287 ---- appendStringInfoString(&buf, "sof:0\n"); appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren); for (i = 0; i < snapshot->subxcnt; i++) ! appendStringInfo(&buf, "sxp:" XID_FMT "\n", snapshot->subxip[i]); for (i = 0; i < nchildren; i++) ! appendStringInfo(&buf, "sxp:" XID_FMT "\n", children[i]); } appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery); *************** parseXidFromText(const char *prefix, cha *** 1383,1389 **** (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; ! if (sscanf(ptr, "%u", &val) != 1) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); --- 1384,1390 ---- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; ! if (sscanf(ptr, XID_FMT, &val) != 1) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); *************** parseVxidFromText(const char *prefix, ch *** 1408,1414 **** (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; ! if (sscanf(ptr, "%d/%u", &vxid->backendId, &vxid->localTransactionId) != 2) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); --- 1409,1415 ---- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; ! if (sscanf(ptr, "%d/" XID_FMT, &vxid->backendId, &vxid->localTransactionId) != 2) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c new file mode 100644 index bbac408..f3eced3 *** a/src/backend/utils/time/tqual.c --- b/src/backend/utils/time/tqual.c *************** HeapTupleSatisfiesSelf(HeapTuple htup, S *** 226,232 **** } } } ! else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; --- 226,232 ---- } } } ! else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; *************** HeapTupleSatisfiesSelf(HeapTuple htup, S *** 238,244 **** { TransactionId xmax; ! xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); --- 238,244 ---- { TransactionId xmax; ! xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); *************** HeapTupleSatisfiesSelf(HeapTuple htup, S *** 250,256 **** return false; } ! if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, --- 250,256 ---- return false; } ! if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, *************** HeapTupleSatisfiesSelf(HeapTuple htup, S *** 260,270 **** return false; } ! else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) return false; ! else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, ! HeapTupleHeaderGetRawXmin(tuple)); else { /* it must have aborted or crashed */ --- 260,270 ---- return false; } ! else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) return false; ! else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, ! HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ *************** HeapTupleSatisfiesSelf(HeapTuple htup, S *** 293,299 **** if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; ! xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); --- 293,299 ---- if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; ! xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); *************** HeapTupleSatisfiesSelf(HeapTuple htup, S *** 308,324 **** return true; } ! if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } ! if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) return true; ! if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, --- 308,324 ---- return true; } ! if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } ! if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return true; ! if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, *************** HeapTupleSatisfiesSelf(HeapTuple htup, S *** 336,342 **** } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, ! HeapTupleHeaderGetRawXmax(tuple)); return false; } --- 336,342 ---- } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, ! HeapTupleGetRawXmax(htup)); return false; } *************** HeapTupleSatisfiesToast(HeapTuple htup, *** 423,429 **** * is canceled by super-deleting the tuple. This also applies to * TOAST tuples created during speculative insertion. */ ! else if (!TransactionIdIsValid(HeapTupleHeaderGetXmin(tuple))) return false; } --- 423,429 ---- * is canceled by super-deleting the tuple. This also applies to * TOAST tuples created during speculative insertion. */ ! else if (!TransactionIdIsValid(HeapTupleGetXmin(htup))) return false; } *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 511,517 **** } } } ! else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (HeapTupleHeaderGetCmin(tuple) >= curcid) return HeapTupleInvisible; /* inserted after scan started */ --- 511,517 ---- } } } ! else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (HeapTupleHeaderGetCmin(tuple) >= curcid) return HeapTupleInvisible; /* inserted after scan started */ *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 523,529 **** { TransactionId xmax; ! xmax = HeapTupleHeaderGetRawXmax(tuple); /* * Careful here: even though this tuple was created by our own --- 523,529 ---- { TransactionId xmax; ! xmax = HeapTupleGetRawXmax(htup); /* * Careful here: even though this tuple was created by our own *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 554,560 **** { TransactionId xmax; ! xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); --- 554,560 ---- { TransactionId xmax; ! xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 562,568 **** /* deleting subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) { ! if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) return HeapTupleBeingUpdated; return HeapTupleMayBeUpdated; --- 562,568 ---- /* deleting subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) { ! if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return HeapTupleBeingUpdated; return HeapTupleMayBeUpdated; *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 578,584 **** } } ! if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, --- 578,584 ---- } } ! if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 591,601 **** else return HeapTupleInvisible; /* updated before scan started */ } ! else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) return HeapTupleInvisible; ! else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, ! HeapTupleHeaderGetRawXmin(tuple)); else { /* it must have aborted or crashed */ --- 591,601 ---- else return HeapTupleInvisible; /* updated before scan started */ } ! else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) return HeapTupleInvisible; ! else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, ! HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 626,642 **** if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) { ! if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), true)) return HeapTupleBeingUpdated; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HeapTupleMayBeUpdated; } ! xmax = HeapTupleGetUpdateXid(tuple); if (!TransactionIdIsValid(xmax)) { ! if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) return HeapTupleBeingUpdated; } --- 626,642 ---- if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) { ! if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true)) return HeapTupleBeingUpdated; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HeapTupleMayBeUpdated; } ! xmax = HeapTupleGetUpdateXid(htup); if (!TransactionIdIsValid(xmax)) { ! if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return HeapTupleBeingUpdated; } *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 651,657 **** return HeapTupleInvisible; /* updated before scan started */ } ! if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) return HeapTupleBeingUpdated; if (TransactionIdDidCommit(xmax)) --- 651,657 ---- return HeapTupleInvisible; /* updated before scan started */ } ! if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return HeapTupleBeingUpdated; if (TransactionIdDidCommit(xmax)) *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 662,668 **** * what about the other members? */ ! if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) { /* * There's no member, even just a locker, alive anymore, so we can --- 662,668 ---- * what about the other members? */ ! if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) { /* * There's no member, even just a locker, alive anymore, so we can *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 679,685 **** } } ! if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return HeapTupleBeingUpdated; --- 679,685 ---- } } ! if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return HeapTupleBeingUpdated; *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 689,698 **** return HeapTupleInvisible; /* updated before scan started */ } ! if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) return HeapTupleBeingUpdated; ! if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, --- 689,698 ---- return HeapTupleInvisible; /* updated before scan started */ } ! if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return HeapTupleBeingUpdated; ! if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, *************** HeapTupleSatisfiesUpdate(HeapTuple htup, *** 710,716 **** } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, ! HeapTupleHeaderGetRawXmax(tuple)); return HeapTupleUpdated; /* updated by other */ } --- 710,716 ---- } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, ! HeapTupleGetRawXmax(htup)); return HeapTupleUpdated; /* updated by other */ } *************** HeapTupleSatisfiesDirty(HeapTuple htup, *** 793,799 **** } } } ! else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; --- 793,799 ---- } } } ! else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; *************** HeapTupleSatisfiesDirty(HeapTuple htup, *** 805,811 **** { TransactionId xmax; ! xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); --- 805,811 ---- { TransactionId xmax; ! xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); *************** HeapTupleSatisfiesDirty(HeapTuple htup, *** 817,823 **** return false; } ! if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, --- 817,823 ---- return false; } ! if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, *************** HeapTupleSatisfiesDirty(HeapTuple htup, *** 827,833 **** return false; } ! else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) { /* * Return the speculative token to caller. Caller can worry about --- 827,833 ---- return false; } ! else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) { /* * Return the speculative token to caller. Caller can worry about *************** HeapTupleSatisfiesDirty(HeapTuple htup, *** 843,855 **** Assert(snapshot->speculativeToken != 0); } ! snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple); /* XXX shouldn't we fall through to look at xmax? */ return true; /* in insertion by other */ } ! else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, ! HeapTupleHeaderGetRawXmin(tuple)); else { /* it must have aborted or crashed */ --- 843,855 ---- Assert(snapshot->speculativeToken != 0); } ! snapshot->xmin = HeapTupleGetRawXmin(htup); /* XXX shouldn't we fall through to look at xmax? */ return true; /* in insertion by other */ } ! else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, ! HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ *************** HeapTupleSatisfiesDirty(HeapTuple htup, *** 878,884 **** if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; ! xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); --- 878,884 ---- if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; ! xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); *************** HeapTupleSatisfiesDirty(HeapTuple htup, *** 896,916 **** return true; } ! if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } ! if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) { if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) ! snapshot->xmax = HeapTupleHeaderGetRawXmax(tuple); return true; } ! if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, --- 896,916 ---- return true; } ! if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } ! if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) { if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) ! snapshot->xmax = HeapTupleGetRawXmax(htup); return true; } ! if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, *************** HeapTupleSatisfiesDirty(HeapTuple htup, *** 928,934 **** } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, ! HeapTupleHeaderGetRawXmax(tuple)); return false; /* updated by other */ } --- 928,934 ---- } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, ! HeapTupleGetRawXmax(htup)); return false; /* updated by other */ } *************** HeapTupleSatisfiesMVCC(HeapTuple htup, S *** 1014,1020 **** } } } ! else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) return false; /* inserted after scan started */ --- 1014,1020 ---- } } } ! else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) return false; /* inserted after scan started */ *************** HeapTupleSatisfiesMVCC(HeapTuple htup, S *** 1029,1035 **** { TransactionId xmax; ! xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); --- 1029,1035 ---- { TransactionId xmax; ! xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); *************** HeapTupleSatisfiesMVCC(HeapTuple htup, S *** 1043,1049 **** return false; /* updated before scan started */ } ! if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, --- 1043,1049 ---- return false; /* updated before scan started */ } ! if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, *************** HeapTupleSatisfiesMVCC(HeapTuple htup, S *** 1056,1066 **** else return false; /* deleted before scan started */ } ! else if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) return false; ! else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, ! HeapTupleHeaderGetRawXmin(tuple)); else { /* it must have aborted or crashed */ --- 1056,1066 ---- else return false; /* deleted before scan started */ } ! else if (XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot)) return false; ! else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, ! HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ *************** HeapTupleSatisfiesMVCC(HeapTuple htup, S *** 1073,1079 **** { /* xmin is committed, but maybe not according to our snapshot */ if (!HeapTupleHeaderXminFrozen(tuple) && ! XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) return false; /* treat as still in progress */ } --- 1073,1079 ---- { /* xmin is committed, but maybe not according to our snapshot */ if (!HeapTupleHeaderXminFrozen(tuple) && ! XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot)) return false; /* treat as still in progress */ } *************** HeapTupleSatisfiesMVCC(HeapTuple htup, S *** 1092,1098 **** /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); ! xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); --- 1092,1098 ---- /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); ! xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); *************** HeapTupleSatisfiesMVCC(HeapTuple htup, S *** 1114,1120 **** if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { ! if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) return true; /* deleted after scan started */ --- 1114,1120 ---- if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { ! if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) return true; /* deleted after scan started */ *************** HeapTupleSatisfiesMVCC(HeapTuple htup, S *** 1122,1131 **** return false; /* deleted before scan started */ } ! if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) return true; ! if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, --- 1122,1131 ---- return false; /* deleted before scan started */ } ! if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot)) return true; ! if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, *************** HeapTupleSatisfiesMVCC(HeapTuple htup, S *** 1135,1146 **** /* xmax transaction committed */ SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, ! HeapTupleHeaderGetRawXmax(tuple)); } else { /* xmax is committed, but maybe not according to our snapshot */ ! if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) return true; /* treat as still in progress */ } --- 1135,1146 ---- /* xmax transaction committed */ SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, ! HeapTupleGetRawXmax(htup)); } else { /* xmax is committed, but maybe not according to our snapshot */ ! if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot)) return true; /* treat as still in progress */ } *************** HeapTupleSatisfiesVacuum(HeapTuple htup, *** 1218,1238 **** return HEAPTUPLE_DEAD; } } ! else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HEAPTUPLE_INSERT_IN_PROGRESS; /* only locked? run infomask-only check first, for performance */ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) || ! HeapTupleHeaderIsOnlyLocked(tuple)) return HEAPTUPLE_INSERT_IN_PROGRESS; /* inserted and then deleted by same xact */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple))) return HEAPTUPLE_DELETE_IN_PROGRESS; /* deleting subtransaction must have aborted */ return HEAPTUPLE_INSERT_IN_PROGRESS; } ! else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) { /* * It'd be possible to discern between INSERT/DELETE in progress --- 1218,1238 ---- return HEAPTUPLE_DEAD; } } ! else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HEAPTUPLE_INSERT_IN_PROGRESS; /* only locked? run infomask-only check first, for performance */ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) || ! HeapTupleHeaderIsOnlyLocked(htup)) return HEAPTUPLE_INSERT_IN_PROGRESS; /* inserted and then deleted by same xact */ ! if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(htup))) return HEAPTUPLE_DELETE_IN_PROGRESS; /* deleting subtransaction must have aborted */ return HEAPTUPLE_INSERT_IN_PROGRESS; } ! else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) { /* * It'd be possible to discern between INSERT/DELETE in progress *************** HeapTupleSatisfiesVacuum(HeapTuple htup, *** 1244,1252 **** */ return HEAPTUPLE_INSERT_IN_PROGRESS; } ! else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, ! HeapTupleHeaderGetRawXmin(tuple)); else { /* --- 1244,1252 ---- */ return HEAPTUPLE_INSERT_IN_PROGRESS; } ! else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, ! HeapTupleGetRawXmin(htup)); else { /* *************** HeapTupleSatisfiesVacuum(HeapTuple htup, *** 1288,1301 **** * possibly be running; otherwise have to check. */ if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) && ! MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), true)) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); } else { ! if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); --- 1288,1301 ---- * possibly be running; otherwise have to check. */ if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) && ! MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true)) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); } else { ! if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); *************** HeapTupleSatisfiesVacuum(HeapTuple htup, *** 1315,1326 **** { TransactionId xmax; ! if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) { /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); ! xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); --- 1315,1326 ---- { TransactionId xmax; ! if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) { /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); ! xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); *************** HeapTupleSatisfiesVacuum(HeapTuple htup, *** 1336,1342 **** Assert(!(tuple->t_infomask & HEAP_XMAX_COMMITTED)); ! xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); --- 1336,1342 ---- Assert(!(tuple->t_infomask & HEAP_XMAX_COMMITTED)); ! xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); *************** HeapTupleSatisfiesVacuum(HeapTuple htup, *** 1361,1371 **** if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { ! if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) return HEAPTUPLE_DELETE_IN_PROGRESS; ! else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, ! HeapTupleHeaderGetRawXmax(tuple)); else { /* --- 1361,1371 ---- if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { ! if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return HEAPTUPLE_DELETE_IN_PROGRESS; ! else if (TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, ! HeapTupleGetRawXmax(htup)); else { /* *************** HeapTupleSatisfiesVacuum(HeapTuple htup, *** 1387,1393 **** * Deleter committed, but perhaps it was recent enough that some open * transactions could still see the tuple. */ ! if (!TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin)) return HEAPTUPLE_RECENTLY_DEAD; /* Otherwise, it's dead and removable */ --- 1387,1393 ---- * Deleter committed, but perhaps it was recent enough that some open * transactions could still see the tuple. */ ! if (!TransactionIdPrecedes(HeapTupleGetRawXmax(htup), OldestXmin)) return HEAPTUPLE_RECENTLY_DEAD; /* Otherwise, it's dead and removable */ *************** HeapTupleIsSurelyDead(HeapTuple htup, Tr *** 1469,1475 **** return false; /* Deleter committed, so tuple is dead if the XID is old enough. */ ! return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin); } /* --- 1469,1475 ---- return false; /* Deleter committed, so tuple is dead if the XID is old enough. */ ! return TransactionIdPrecedes(HeapTupleGetRawXmax(htup), OldestXmin); } /* *************** XidInMVCCSnapshot(TransactionId xid, Sna *** 1604,1611 **** * laid out at the top of this file. */ bool ! HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) { TransactionId xmax; /* if there's no valid Xmax, then there's obviously no update either */ --- 1604,1612 ---- * laid out at the top of this file. */ bool ! HeapTupleHeaderIsOnlyLocked(HeapTuple htup) { + HeapTupleHeader tuple = htup->t_data; TransactionId xmax; /* if there's no valid Xmax, then there's obviously no update either */ *************** HeapTupleHeaderIsOnlyLocked(HeapTupleHea *** 1616,1622 **** return true; /* invalid xmax means no update */ ! if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple))) return true; /* --- 1617,1623 ---- return true; /* invalid xmax means no update */ ! if (!TransactionIdIsValid(HeapTupleGetRawXmax(htup))) return true; /* *************** HeapTupleHeaderIsOnlyLocked(HeapTupleHea *** 1627,1633 **** return false; /* ... but if it's a multi, then perhaps the updating Xid aborted. */ ! xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); --- 1628,1634 ---- return false; /* ... but if it's a multi, then perhaps the updating Xid aborted. */ ! xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); *************** HeapTupleSatisfiesHistoricMVCC(HeapTuple *** 1675,1682 **** Buffer buffer) { HeapTupleHeader tuple = htup->t_data; ! TransactionId xmin = HeapTupleHeaderGetXmin(tuple); ! TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple); Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); --- 1676,1683 ---- Buffer buffer) { HeapTupleHeader tuple = htup->t_data; ! TransactionId xmin = HeapTupleGetXmin(htup); ! TransactionId xmax = HeapTupleGetRawXmax(htup); Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); *************** HeapTupleSatisfiesHistoricMVCC(HeapTuple *** 1759,1765 **** */ else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { ! xmax = HeapTupleGetUpdateXid(tuple); } /* check if it's one of our txids, toplevel is also in there */ --- 1760,1766 ---- */ else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { ! xmax = HeapTupleGetUpdateXid(htup); } /* check if it's one of our txids, toplevel is also in there */ diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c new file mode 100644 index 2ea8931..439fcbd *** a/src/bin/pg_controldata/pg_controldata.c --- b/src/bin/pg_controldata/pg_controldata.c *************** main(int argc, char *argv[]) *** 225,252 **** ControlFile->checkPointCopy.PrevTimeLineID); printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile->checkPointCopy.fullPageWrites ? _("on") : _("off")); ! printf(_("Latest checkpoint's NextXID: %u:%u\n"), ! ControlFile->checkPointCopy.nextXidEpoch, ControlFile->checkPointCopy.nextXid); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile->checkPointCopy.nextOid); ! printf(_("Latest checkpoint's NextMultiXactId: %u\n"), ControlFile->checkPointCopy.nextMulti); ! printf(_("Latest checkpoint's NextMultiOffset: %u\n"), ControlFile->checkPointCopy.nextMultiOffset); ! printf(_("Latest checkpoint's oldestXID: %u\n"), ControlFile->checkPointCopy.oldestXid); printf(_("Latest checkpoint's oldestXID's DB: %u\n"), ControlFile->checkPointCopy.oldestXidDB); ! printf(_("Latest checkpoint's oldestActiveXID: %u\n"), ControlFile->checkPointCopy.oldestActiveXid); ! printf(_("Latest checkpoint's oldestMultiXid: %u\n"), ControlFile->checkPointCopy.oldestMulti); printf(_("Latest checkpoint's oldestMulti's DB: %u\n"), ControlFile->checkPointCopy.oldestMultiDB); ! printf(_("Latest checkpoint's oldestCommitTsXid:%u\n"), ControlFile->checkPointCopy.oldestCommitTsXid); ! printf(_("Latest checkpoint's newestCommitTsXid:%u\n"), ControlFile->checkPointCopy.newestCommitTsXid); printf(_("Time of latest checkpoint: %s\n"), ckpttime_str); --- 225,251 ---- ControlFile->checkPointCopy.PrevTimeLineID); printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile->checkPointCopy.fullPageWrites ? _("on") : _("off")); ! printf(_("Latest checkpoint's NextXID: " XID_FMT "\n"), ControlFile->checkPointCopy.nextXid); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile->checkPointCopy.nextOid); ! printf(_("Latest checkpoint's NextMultiXactId: " XID_FMT "\n"), ControlFile->checkPointCopy.nextMulti); ! printf(_("Latest checkpoint's NextMultiOffset: " INT64_FORMAT "\n"), ControlFile->checkPointCopy.nextMultiOffset); ! printf(_("Latest checkpoint's oldestXID: " XID_FMT "\n"), ControlFile->checkPointCopy.oldestXid); printf(_("Latest checkpoint's oldestXID's DB: %u\n"), ControlFile->checkPointCopy.oldestXidDB); ! printf(_("Latest checkpoint's oldestActiveXID: " XID_FMT "\n"), ControlFile->checkPointCopy.oldestActiveXid); ! printf(_("Latest checkpoint's oldestMultiXid: " XID_FMT "\n"), ControlFile->checkPointCopy.oldestMulti); printf(_("Latest checkpoint's oldestMulti's DB: %u\n"), ControlFile->checkPointCopy.oldestMultiDB); ! printf(_("Latest checkpoint's oldestCommitTsXid:" XID_FMT "\n"), ControlFile->checkPointCopy.oldestCommitTsXid); ! printf(_("Latest checkpoint's newestCommitTsXid:" XID_FMT "\n"), ControlFile->checkPointCopy.newestCommitTsXid); printf(_("Time of latest checkpoint: %s\n"), ckpttime_str); diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c new file mode 100644 index ac67831..2e40757 *** a/src/bin/pg_resetwal/pg_resetwal.c --- b/src/bin/pg_resetwal/pg_resetwal.c *************** static ControlFileData ControlFile; /* p *** 61,67 **** static XLogSegNo newXlogSegNo; /* new XLOG segment # */ static bool guessed = false; /* T if we had to guess at any values */ static const char *progname; - static uint32 set_xid_epoch = (uint32) -1; static TransactionId set_xid = 0; static TransactionId set_oldest_commit_ts_xid = 0; static TransactionId set_newest_commit_ts_xid = 0; --- 61,66 ---- *************** static void KillExistingXLOG(void); *** 82,87 **** --- 81,87 ---- static void KillExistingArchiveStatus(void); static void WriteEmptyXLOG(void); static void usage(void); + static uint64 str2uint64(const char *str, char **endptr, int base); int *************** main(int argc, char *argv[]) *** 115,121 **** } ! while ((c = getopt(argc, argv, "c:D:e:fl:m:no:O:x:")) != -1) { switch (c) { --- 115,121 ---- } ! while ((c = getopt(argc, argv, "c:D:fl:m:no:O:x:")) != -1) { switch (c) { *************** main(int argc, char *argv[]) *** 131,155 **** noupdate = true; break; - case 'e': - set_xid_epoch = strtoul(optarg, &endptr, 0); - if (endptr == optarg || *endptr != '\0') - { - /*------ - translator: the second %s is a command line argument (-e, etc) */ - fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-e"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); - exit(1); - } - if (set_xid_epoch == -1) - { - fprintf(stderr, _("%s: transaction ID epoch (-e) must not be -1\n"), progname); - exit(1); - } - break; - case 'x': ! set_xid = strtoul(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-x"); --- 131,138 ---- noupdate = true; break; case 'x': ! set_xid = str2uint64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-x"); *************** main(int argc, char *argv[]) *** 164,177 **** break; case 'c': ! set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c"); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } ! set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c"); --- 147,160 ---- break; case 'c': ! set_oldest_commit_ts_xid = str2uint64(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c"); fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname); exit(1); } ! set_newest_commit_ts_xid = str2uint64(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-c"); *************** main(int argc, char *argv[]) *** 210,216 **** break; case 'm': ! set_mxid = strtoul(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m"); --- 193,199 ---- break; case 'm': ! set_mxid = str2uint64(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m"); *************** main(int argc, char *argv[]) *** 218,224 **** exit(1); } ! set_oldestmxid = strtoul(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m"); --- 201,207 ---- exit(1); } ! set_oldestmxid = str2uint64(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-m"); *************** main(int argc, char *argv[]) *** 244,250 **** break; case 'O': ! set_mxoff = strtoul(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-O"); --- 227,233 ---- break; case 'O': ! set_mxoff = str2uint64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0') { fprintf(stderr, _("%s: invalid argument for option %s\n"), progname, "-O"); *************** main(int argc, char *argv[]) *** 362,374 **** if ((guessed && !force) || noupdate) PrintControlValues(guessed); - /* - * Adjust fields if required by switches. (Do this now so that printout, - * if any, includes these values.) - */ - if (set_xid_epoch != -1) - ControlFile.checkPointCopy.nextXidEpoch = set_xid_epoch; - if (set_xid != 0) { ControlFile.checkPointCopy.nextXid = set_xid; --- 345,350 ---- *************** GuessControlValues(void) *** 628,634 **** ControlFile.checkPointCopy.ThisTimeLineID = 1; ControlFile.checkPointCopy.PrevTimeLineID = 1; ControlFile.checkPointCopy.fullPageWrites = false; - ControlFile.checkPointCopy.nextXidEpoch = 0; ControlFile.checkPointCopy.nextXid = FirstNormalTransactionId; ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId; ControlFile.checkPointCopy.nextMulti = FirstMultiXactId; --- 604,609 ---- *************** PrintControlValues(bool guessed) *** 708,735 **** ControlFile.checkPointCopy.ThisTimeLineID); printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off")); ! printf(_("Latest checkpoint's NextXID: %u:%u\n"), ! ControlFile.checkPointCopy.nextXidEpoch, ControlFile.checkPointCopy.nextXid); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); ! printf(_("Latest checkpoint's NextMultiXactId: %u\n"), ControlFile.checkPointCopy.nextMulti); ! printf(_("Latest checkpoint's NextMultiOffset: %u\n"), ControlFile.checkPointCopy.nextMultiOffset); ! printf(_("Latest checkpoint's oldestXID: %u\n"), ControlFile.checkPointCopy.oldestXid); printf(_("Latest checkpoint's oldestXID's DB: %u\n"), ControlFile.checkPointCopy.oldestXidDB); ! printf(_("Latest checkpoint's oldestActiveXID: %u\n"), ControlFile.checkPointCopy.oldestActiveXid); ! printf(_("Latest checkpoint's oldestMultiXid: %u\n"), ControlFile.checkPointCopy.oldestMulti); printf(_("Latest checkpoint's oldestMulti's DB: %u\n"), ControlFile.checkPointCopy.oldestMultiDB); ! printf(_("Latest checkpoint's oldestCommitTsXid:%u\n"), ControlFile.checkPointCopy.oldestCommitTsXid); ! printf(_("Latest checkpoint's newestCommitTsXid:%u\n"), ControlFile.checkPointCopy.newestCommitTsXid); printf(_("Maximum data alignment: %u\n"), ControlFile.maxAlign); --- 683,709 ---- ControlFile.checkPointCopy.ThisTimeLineID); printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off")); ! printf(_("Latest checkpoint's NextXID: " XID_FMT "\n"), ControlFile.checkPointCopy.nextXid); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); ! printf(_("Latest checkpoint's NextMultiXactId: " XID_FMT "\n"), ControlFile.checkPointCopy.nextMulti); ! printf(_("Latest checkpoint's NextMultiOffset: " INT64_FORMAT "\n"), ControlFile.checkPointCopy.nextMultiOffset); ! printf(_("Latest checkpoint's oldestXID: " XID_FMT "\n"), ControlFile.checkPointCopy.oldestXid); printf(_("Latest checkpoint's oldestXID's DB: %u\n"), ControlFile.checkPointCopy.oldestXidDB); ! printf(_("Latest checkpoint's oldestActiveXID: " XID_FMT "\n"), ControlFile.checkPointCopy.oldestActiveXid); ! printf(_("Latest checkpoint's oldestMultiXid: " XID_FMT "\n"), ControlFile.checkPointCopy.oldestMulti); printf(_("Latest checkpoint's oldestMulti's DB: %u\n"), ControlFile.checkPointCopy.oldestMultiDB); ! printf(_("Latest checkpoint's oldestCommitTsXid:" XID_FMT "\n"), ControlFile.checkPointCopy.oldestCommitTsXid); ! printf(_("Latest checkpoint's newestCommitTsXid:" XID_FMT "\n"), ControlFile.checkPointCopy.newestCommitTsXid); printf(_("Maximum data alignment: %u\n"), ControlFile.maxAlign); *************** PrintNewControlValues(void) *** 778,786 **** if (set_mxid != 0) { ! printf(_("NextMultiXactId: %u\n"), ControlFile.checkPointCopy.nextMulti); ! printf(_("OldestMultiXid: %u\n"), ControlFile.checkPointCopy.oldestMulti); printf(_("OldestMulti's DB: %u\n"), ControlFile.checkPointCopy.oldestMultiDB); --- 752,760 ---- if (set_mxid != 0) { ! printf(_("NextMultiXactId: " XID_FMT "\n"), ControlFile.checkPointCopy.nextMulti); ! printf(_("OldestMultiXid: " XID_FMT "\n"), ControlFile.checkPointCopy.oldestMulti); printf(_("OldestMulti's DB: %u\n"), ControlFile.checkPointCopy.oldestMultiDB); *************** PrintNewControlValues(void) *** 788,794 **** if (set_mxoff != -1) { ! printf(_("NextMultiOffset: %u\n"), ControlFile.checkPointCopy.nextMultiOffset); } --- 762,768 ---- if (set_mxoff != -1) { ! printf(_("NextMultiOffset: " INT64_FORMAT "\n"), ControlFile.checkPointCopy.nextMultiOffset); } *************** PrintNewControlValues(void) *** 800,827 **** if (set_xid != 0) { ! printf(_("NextXID: %u\n"), ControlFile.checkPointCopy.nextXid); ! printf(_("OldestXID: %u\n"), ControlFile.checkPointCopy.oldestXid); printf(_("OldestXID's DB: %u\n"), ControlFile.checkPointCopy.oldestXidDB); } - if (set_xid_epoch != -1) - { - printf(_("NextXID epoch: %u\n"), - ControlFile.checkPointCopy.nextXidEpoch); - } - if (set_oldest_commit_ts_xid != 0) { ! printf(_("oldestCommitTsXid: %u\n"), ControlFile.checkPointCopy.oldestCommitTsXid); } if (set_newest_commit_ts_xid != 0) { ! printf(_("newestCommitTsXid: %u\n"), ControlFile.checkPointCopy.newestCommitTsXid); } } --- 774,795 ---- if (set_xid != 0) { ! printf(_("NextXID: " XID_FMT "\n"), ControlFile.checkPointCopy.nextXid); ! printf(_("OldestXID: " XID_FMT "\n"), ControlFile.checkPointCopy.oldestXid); printf(_("OldestXID's DB: %u\n"), ControlFile.checkPointCopy.oldestXidDB); } if (set_oldest_commit_ts_xid != 0) { ! printf(_("oldestCommitTsXid: " XID_FMT "\n"), ControlFile.checkPointCopy.oldestCommitTsXid); } if (set_newest_commit_ts_xid != 0) { ! printf(_("newestCommitTsXid: " XID_FMT "\n"), ControlFile.checkPointCopy.newestCommitTsXid); } } *************** usage(void) *** 1234,1240 **** printf(_(" -c XID,XID set oldest and newest transactions bearing commit timestamp\n")); printf(_(" (zero in either value means no change)\n")); printf(_(" [-D] DATADIR data directory\n")); - printf(_(" -e XIDEPOCH set next transaction ID epoch\n")); printf(_(" -f force update to be done\n")); printf(_(" -l WALFILE force minimum WAL starting location for new write-ahead log\n")); printf(_(" -m MXID,MXID set next and oldest multitransaction ID\n")); --- 1202,1207 ---- *************** usage(void) *** 1246,1248 **** --- 1213,1233 ---- printf(_(" -?, --help show this help, then exit\n")); printf(_("\nReport bugs to .\n")); } + + + /* + * str2uint64() + * + * convert string to 64-bit unsigned int + */ + static uint64 + str2uint64(const char *str, char **endptr, int base) + { + #ifdef _MSC_VER /* MSVC only */ + return _strtoui64(str, endptr, base); + #elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8 + return strtoull(str, endptr, base); + #else + return strtoul(str, endptr, base); + #endif + } diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c new file mode 100644 index ca3db1a..9c6c84a *** a/src/bin/pg_upgrade/controldata.c --- b/src/bin/pg_upgrade/controldata.c *************** get_control_data(ClusterInfo *cluster, b *** 201,226 **** pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ ! cluster->controldata.chkpnt_nxtepoch = str2uint(p); ! ! /* ! * Delimiter changed from '/' to ':' in 9.6. We don't test for ! * the catalog version of the change because the catalog version ! * is pulled from pg_controldata too, and it isn't worth adding an ! * order dependency for this --- we just check the string. ! */ ! if (strchr(p, '/') != NULL) ! p = strchr(p, '/'); ! else if (GET_MAJOR_VERSION(cluster->major_version) >= 906) ! p = strchr(p, ':'); ! else ! p = NULL; ! ! if (p == NULL || strlen(p) <= 1) ! pg_fatal("%d: controldata retrieval problem\n", __LINE__); ! ! p++; /* remove '/' or ':' char */ ! cluster->controldata.chkpnt_nxtxid = str2uint(p); got_xid = true; } else if ((p = strstr(bufin, "Latest checkpoint's NextOID:")) != NULL) --- 201,207 ---- pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ ! cluster->controldata.chkpnt_nxtxid = str2uint64(p); got_xid = true; } else if ((p = strstr(bufin, "Latest checkpoint's NextOID:")) != NULL) *************** get_control_data(ClusterInfo *cluster, b *** 242,248 **** pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ ! cluster->controldata.chkpnt_nxtmulti = str2uint(p); got_multi = true; } else if ((p = strstr(bufin, "Latest checkpoint's oldestMultiXid:")) != NULL) --- 223,229 ---- pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ ! cluster->controldata.chkpnt_nxtmulti = str2uint64(p); got_multi = true; } else if ((p = strstr(bufin, "Latest checkpoint's oldestMultiXid:")) != NULL) *************** get_control_data(ClusterInfo *cluster, b *** 253,259 **** pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ ! cluster->controldata.chkpnt_oldstMulti = str2uint(p); got_oldestmulti = true; } else if ((p = strstr(bufin, "Latest checkpoint's NextMultiOffset:")) != NULL) --- 234,240 ---- pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ ! cluster->controldata.chkpnt_oldstMulti = str2uint64(p); got_oldestmulti = true; } else if ((p = strstr(bufin, "Latest checkpoint's NextMultiOffset:")) != NULL) *************** get_control_data(ClusterInfo *cluster, b *** 264,270 **** pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ ! cluster->controldata.chkpnt_nxtmxoff = str2uint(p); got_mxoff = true; } else if ((p = strstr(bufin, "First log segment after reset:")) != NULL) --- 245,251 ---- pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ ! cluster->controldata.chkpnt_nxtmxoff = str2uint64(p); got_mxoff = true; } else if ((p = strstr(bufin, "First log segment after reset:")) != NULL) diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c new file mode 100644 index d44fefb..a4f82d2 *** a/src/bin/pg_upgrade/pg_upgrade.c --- b/src/bin/pg_upgrade/pg_upgrade.c *************** copy_xact_xlog_xid(void) *** 419,434 **** /* set the next transaction id and epoch of the new cluster */ prep_status("Setting next transaction ID and epoch for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, ! "\"%s/pg_resetwal\" -f -x %u \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtxid, new_cluster.pgdata); exec_prog(UTILITY_LOG_FILE, NULL, true, "\"%s/pg_resetwal\" -f -e %u \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch, new_cluster.pgdata); /* must reset commit timestamp limits also */ exec_prog(UTILITY_LOG_FILE, NULL, true, ! "\"%s/pg_resetwal\" -f -c %u,%u \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtxid, old_cluster.controldata.chkpnt_nxtxid, --- 419,436 ---- /* set the next transaction id and epoch of the new cluster */ prep_status("Setting next transaction ID and epoch for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, ! "\"%s/pg_resetwal\" -f -x " XID_FMT " \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtxid, new_cluster.pgdata); + #ifdef NOT_USED exec_prog(UTILITY_LOG_FILE, NULL, true, "\"%s/pg_resetwal\" -f -e %u \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch, new_cluster.pgdata); + #endif /* must reset commit timestamp limits also */ exec_prog(UTILITY_LOG_FILE, NULL, true, ! "\"%s/pg_resetwal\" -f -c " XID_FMT "," XID_FMT " \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtxid, old_cluster.controldata.chkpnt_nxtxid, *************** copy_xact_xlog_xid(void) *** 454,460 **** * counters here and the oldest multi present on system. */ exec_prog(UTILITY_LOG_FILE, NULL, true, ! "\"%s/pg_resetwal\" -O %u -m %u,%u \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtmxoff, old_cluster.controldata.chkpnt_nxtmulti, --- 456,462 ---- * counters here and the oldest multi present on system. */ exec_prog(UTILITY_LOG_FILE, NULL, true, ! "\"%s/pg_resetwal\" -O " XID_FMT " -m " XID_FMT "," XID_FMT " \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtmxoff, old_cluster.controldata.chkpnt_nxtmulti, *************** copy_xact_xlog_xid(void) *** 482,488 **** * next=MaxMultiXactId, but multixact.c can cope with that just fine. */ exec_prog(UTILITY_LOG_FILE, NULL, true, ! "\"%s/pg_resetwal\" -m %u,%u \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtmulti + 1, old_cluster.controldata.chkpnt_nxtmulti, --- 484,490 ---- * next=MaxMultiXactId, but multixact.c can cope with that just fine. */ exec_prog(UTILITY_LOG_FILE, NULL, true, ! "\"%s/pg_resetwal\" -m " XID_FMT "," XID_FMT " \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtmulti + 1, old_cluster.controldata.chkpnt_nxtmulti, *************** set_frozenxids(bool minmxid_only) *** 533,545 **** /* set pg_database.datfrozenxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " ! "SET datfrozenxid = '%u'", old_cluster.controldata.chkpnt_nxtxid)); /* set pg_database.datminmxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " ! "SET datminmxid = '%u'", old_cluster.controldata.chkpnt_nxtmulti)); /* get database names */ --- 535,547 ---- /* set pg_database.datfrozenxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " ! "SET datfrozenxid = '" XID_FMT "'", old_cluster.controldata.chkpnt_nxtxid)); /* set pg_database.datminmxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " ! "SET datminmxid = '" XID_FMT "'", old_cluster.controldata.chkpnt_nxtmulti)); /* get database names */ *************** set_frozenxids(bool minmxid_only) *** 574,580 **** /* set pg_class.relfrozenxid */ PQclear(executeQueryOrDie(conn, "UPDATE pg_catalog.pg_class " ! "SET relfrozenxid = '%u' " /* only heap, materialized view, and TOAST are vacuumed */ "WHERE relkind IN (" CppAsString2(RELKIND_RELATION) ", " --- 576,582 ---- /* set pg_class.relfrozenxid */ PQclear(executeQueryOrDie(conn, "UPDATE pg_catalog.pg_class " ! "SET relfrozenxid = '" XID_FMT "' " /* only heap, materialized view, and TOAST are vacuumed */ "WHERE relkind IN (" CppAsString2(RELKIND_RELATION) ", " *************** set_frozenxids(bool minmxid_only) *** 585,591 **** /* set pg_class.relminmxid */ PQclear(executeQueryOrDie(conn, "UPDATE pg_catalog.pg_class " ! "SET relminmxid = '%u' " /* only heap, materialized view, and TOAST are vacuumed */ "WHERE relkind IN (" CppAsString2(RELKIND_RELATION) ", " --- 587,593 ---- /* set pg_class.relminmxid */ PQclear(executeQueryOrDie(conn, "UPDATE pg_catalog.pg_class " ! "SET relminmxid = '" XID_FMT "' " /* only heap, materialized view, and TOAST are vacuumed */ "WHERE relkind IN (" CppAsString2(RELKIND_RELATION) ", " diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h new file mode 100644 index e44c236..7634bdd *** a/src/bin/pg_upgrade/pg_upgrade.h --- b/src/bin/pg_upgrade/pg_upgrade.h *************** typedef struct *** 209,220 **** uint32 ctrl_ver; uint32 cat_ver; char nextxlogfile[25]; ! uint32 chkpnt_nxtxid; ! uint32 chkpnt_nxtepoch; uint32 chkpnt_nxtoid; ! uint32 chkpnt_nxtmulti; ! uint32 chkpnt_nxtmxoff; ! uint32 chkpnt_oldstMulti; uint32 align; uint32 blocksz; uint32 largesz; --- 209,219 ---- uint32 ctrl_ver; uint32 cat_ver; char nextxlogfile[25]; ! uint64 chkpnt_nxtxid; uint32 chkpnt_nxtoid; ! uint64 chkpnt_nxtmulti; ! uint64 chkpnt_nxtmxoff; ! uint64 chkpnt_oldstMulti; uint32 align; uint32 blocksz; uint32 largesz; *************** void end_progress_output(void); *** 434,439 **** --- 433,439 ---- void prep_status(const char *fmt,...) pg_attribute_printf(1, 2); void check_ok(void); unsigned int str2uint(const char *str); + uint64 str2uint64(const char *str); void pg_putenv(const char *var, const char *val); diff --git a/src/bin/pg_upgrade/util.c b/src/bin/pg_upgrade/util.c new file mode 100644 index 44c1bc8..e5d7bd3 *** a/src/bin/pg_upgrade/util.c --- b/src/bin/pg_upgrade/util.c *************** str2uint(const char *str) *** 245,250 **** --- 245,268 ---- /* + * str2uint64() + * + * convert string to 64-bit unsigned int + */ + uint64 + str2uint64(const char *str) + { + #ifdef _MSC_VER /* MSVC only */ + return _strtoui64(str, NULL, 10); + #elif defined(HAVE_STRTOULL) && SIZEOF_LONG < 8 + return strtoull(str, NULL, 10); + #else + return strtoul(str, NULL, 10); + #endif + } + + + /* * pg_putenv() * * This is like putenv(), but takes two arguments. diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c new file mode 100644 index 5aa3233..c6ae244 *** a/src/bin/pg_waldump/pg_waldump.c --- b/src/bin/pg_waldump/pg_waldump.c *************** XLogDumpDisplayRecord(XLogDumpConfig *co *** 452,458 **** if (id == NULL) id = psprintf("UNKNOWN (%x)", info & ~XLR_INFO_MASK); ! printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: %10u, lsn: %X/%08X, prev %X/%08X, ", desc->rm_name, rec_len, XLogRecGetTotalLen(record), XLogRecGetXid(record), --- 452,458 ---- if (id == NULL) id = psprintf("UNKNOWN (%x)", info & ~XLR_INFO_MASK); ! printf("rmgr: %-11s len (rec/tot): %6u/%6u, tx: " XID_FMT ", lsn: %X/%08X, prev %X/%08X, ", desc->rm_name, rec_len, XLogRecGetTotalLen(record), XLogRecGetXid(record), *************** main(int argc, char **argv) *** 868,874 **** exit(EXIT_SUCCESS); break; case 'x': ! if (sscanf(optarg, "%u", &config.filter_by_xid) != 1) { fprintf(stderr, _("%s: could not parse \"%s\" as a transaction ID\n"), progname, optarg); --- 868,874 ---- exit(EXIT_SUCCESS); break; case 'x': ! if (sscanf(optarg, XID_FMT, &config.filter_by_xid) != 1) { fprintf(stderr, _("%s: could not parse \"%s\" as a transaction ID\n"), progname, optarg); diff --git a/src/include/access/clog.h b/src/include/access/clog.h new file mode 100644 index 7bae090..8476503 *** a/src/include/access/clog.h --- b/src/include/access/clog.h *************** typedef int XidStatus; *** 30,36 **** typedef struct xl_clog_truncate { ! int pageno; TransactionId oldestXact; Oid oldestXactDb; } xl_clog_truncate; --- 30,36 ---- typedef struct xl_clog_truncate { ! int64 pageno; TransactionId oldestXact; Oid oldestXactDb; } xl_clog_truncate; diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h new file mode 100644 index 31936fa..a4a5709 *** a/src/include/access/commit_ts.h --- b/src/include/access/commit_ts.h *************** typedef struct xl_commit_ts_set *** 63,69 **** typedef struct xl_commit_ts_truncate { ! int pageno; TransactionId oldestXid; } xl_commit_ts_truncate; --- 63,69 ---- typedef struct xl_commit_ts_truncate { ! int64 pageno; TransactionId oldestXid; } xl_commit_ts_truncate; diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h new file mode 100644 index 4e41024..f7f48f6 *** a/src/include/access/heapam.h --- b/src/include/access/heapam.h *************** extern void ReleaseBulkInsertStatePin(Bu *** 152,157 **** --- 152,161 ---- extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate); + extern bool heap_page_prepare_for_xid(Relation relation, Buffer buffer, + TransactionId xid, bool multi); + extern bool rewrite_page_prepare_for_xid(Page page, TransactionId xid, + bool multi); extern void heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, CommandId cid, int options, BulkInsertState bistate); extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, *************** extern HTSU_Result heap_lock_tuple(Relat *** 168,176 **** bool follow_update, Buffer *buffer, HeapUpdateFailureData *hufd); extern void heap_inplace_update(Relation relation, HeapTuple tuple); ! extern bool heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, TransactionId cutoff_multi); ! extern bool heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf); extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); --- 172,180 ---- bool follow_update, Buffer *buffer, HeapUpdateFailureData *hufd); extern void heap_inplace_update(Relation relation, HeapTuple tuple); ! extern bool heap_freeze_tuple(HeapTuple tuple, TransactionId cutoff_xid, TransactionId cutoff_multi); ! extern bool heap_tuple_needs_freeze(HeapTuple htup, TransactionId cutoff_xid, MultiXactId cutoff_multi, Buffer buf); extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); *************** extern void heap_update_snapshot(HeapSca *** 186,196 **** extern void heap_page_prune_opt(Relation relation, Buffer buffer); extern int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, ! bool report_stats, TransactionId *latestRemovedXid); extern void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, ! OffsetNumber *nowunused, int nunused); extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets); /* in heap/syncscan.c */ --- 190,202 ---- extern void heap_page_prune_opt(Relation relation, Buffer buffer); extern int heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin, ! bool report_stats, TransactionId *latestRemovedXid, ! bool repairFragmentation); extern void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, ! OffsetNumber *nowunused, int nunused, ! bool repairFragmentation); extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets); /* in heap/syncscan.c */ diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h new file mode 100644 index 81a6a39..e2896b2 *** a/src/include/access/heapam_xlog.h --- b/src/include/access/heapam_xlog.h *************** *** 32,38 **** #define XLOG_HEAP_INSERT 0x00 #define XLOG_HEAP_DELETE 0x10 #define XLOG_HEAP_UPDATE 0x20 ! /* 0x030 is free, was XLOG_HEAP_MOVE */ #define XLOG_HEAP_HOT_UPDATE 0x40 #define XLOG_HEAP_CONFIRM 0x50 #define XLOG_HEAP_LOCK 0x60 --- 32,38 ---- #define XLOG_HEAP_INSERT 0x00 #define XLOG_HEAP_DELETE 0x10 #define XLOG_HEAP_UPDATE 0x20 ! #define XLOG_HEAP_BASE_SHIFT 0x30 #define XLOG_HEAP_HOT_UPDATE 0x40 #define XLOG_HEAP_CONFIRM 0x50 #define XLOG_HEAP_LOCK 0x60 *************** typedef struct xl_heap_rewrite_mapping *** 367,373 **** XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */ } xl_heap_rewrite_mapping; ! extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, TransactionId *latestRemovedXid); extern void heap_redo(XLogReaderState *record); --- 367,383 ---- XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */ } xl_heap_rewrite_mapping; ! /* shift the base of xids on heap page */ ! typedef struct xl_heap_base_shift ! { ! int64 delta; /* delta value to shift the base */ ! bool multi; /* true to shift multixact base */ ! } xl_heap_base_shift; ! ! #define SizeOfHeapBaseShift (offsetof(xl_heap_base_shift, multi) + sizeof(bool)) ! ! ! extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTuple tuple, TransactionId *latestRemovedXid); extern void heap_redo(XLogReaderState *record); *************** extern XLogRecPtr log_heap_clean(Relatio *** 389,400 **** extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples); ! extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen); ! extern void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *xlrec_tp); extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags); --- 399,412 ---- extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples); ! extern bool heap_prepare_freeze_tuple(HeapTuple tuple, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen); ! extern void heap_execute_freeze_tuple(HeapTuple tuple, ! xl_heap_freeze_tuple *xlrec_tp); ! extern void heap_execute_freeze_tuple_page(Page page, HeapTupleHeader tuple, xl_heap_freeze_tuple *xlrec_tp); extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags); diff --git a/src/include/access/hio.h b/src/include/access/hio.h new file mode 100644 index 4a8beb6..fcf764c *** a/src/include/access/hio.h --- b/src/include/access/hio.h *************** typedef struct BulkInsertStateData *** 36,42 **** extern void RelationPutHeapTuple(Relation relation, Buffer buffer, ! HeapTuple tuple, bool token); extern Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, --- 36,42 ---- extern void RelationPutHeapTuple(Relation relation, Buffer buffer, ! HeapTuple tuple, bool token, TransactionId xid); extern Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, diff --git a/src/include/access/htup.h b/src/include/access/htup.h new file mode 100644 index 61b3e68..a0db09f *** a/src/include/access/htup.h --- b/src/include/access/htup.h *************** typedef MinimalTupleData *MinimalTuple; *** 62,69 **** typedef struct HeapTupleData { uint32 t_len; /* length of *t_data */ - ItemPointerData t_self; /* SelfItemPointer */ Oid t_tableOid; /* table the tuple came from */ HeapTupleHeader t_data; /* -> tuple header and data */ } HeapTupleData; --- 62,71 ---- typedef struct HeapTupleData { uint32 t_len; /* length of *t_data */ Oid t_tableOid; /* table the tuple came from */ + TransactionId t_xid_base; + TransactionId t_multi_base; + ItemPointerData t_self; /* SelfItemPointer */ HeapTupleHeader t_data; /* -> tuple header and data */ } HeapTupleData; *************** typedef HeapTupleData *HeapTuple; *** 79,88 **** /* HeapTupleHeader functions implemented in utils/time/combocid.c */ extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup); extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup); ! extern void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, CommandId *cmax, bool *iscombo); /* Prototype for HeapTupleHeader accessors in heapam.c */ ! extern TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple); #endif /* HTUP_H */ --- 81,90 ---- /* HeapTupleHeader functions implemented in utils/time/combocid.c */ extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup); extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup); ! extern void HeapTupleHeaderAdjustCmax(HeapTuple tup, CommandId *cmax, bool *iscombo); /* Prototype for HeapTupleHeader accessors in heapam.c */ ! extern TransactionId HeapTupleGetUpdateXid(HeapTuple tuple); #endif /* HTUP_H */ diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h new file mode 100644 index fa04a63..d2bce32 *** a/src/include/access/htup_details.h --- b/src/include/access/htup_details.h *************** *** 115,127 **** typedef struct HeapTupleFields { ! TransactionId t_xmin; /* inserting xact ID */ ! TransactionId t_xmax; /* deleting or locking xact ID */ union { CommandId t_cid; /* inserting or deleting command ID, or both */ ! TransactionId t_xvac; /* old-style VACUUM FULL xact ID */ } t_field3; } HeapTupleFields; --- 115,127 ---- typedef struct HeapTupleFields { ! ShortTransactionId t_xmin; /* inserting xact ID */ ! ShortTransactionId t_xmax; /* deleting or locking xact ID */ union { CommandId t_cid; /* inserting or deleting command ID, or both */ ! ShortTransactionId t_xvac; /* old-style VACUUM FULL xact ID */ } t_field3; } HeapTupleFields; *************** struct HeapTupleHeaderData *** 291,296 **** --- 291,314 ---- * macros evaluate their other argument only once. */ + #define HeapTupleCopyBaseFromPage(tup, page) \ + { \ + (tup)->t_xid_base = HeapPageGetSpecial(page)->pd_xid_base; \ + (tup)->t_multi_base = HeapPageGetSpecial(page)->pd_multi_base; \ + } + + #define HeapTupleCopyBase(dest, src) \ + { \ + (dest)->t_xid_base = (src)->t_xid_base; \ + (dest)->t_multi_base = (src)->t_xid_base; \ + } + + #define HeapTupleSetZeroBase(tup) \ + { \ + (tup)->t_xid_base = InvalidTransactionId; \ + (tup)->t_multi_base = InvalidTransactionId; \ + } + /* * HeapTupleHeaderGetRawXmin returns the "raw" xmin field, which is the xid * originally used to insert the tuple. However, the tuple might actually *************** struct HeapTupleHeaderData *** 299,318 **** * the xmin to FrozenTransactionId, and that value may still be encountered * on disk. */ ! #define HeapTupleHeaderGetRawXmin(tup) \ ( \ ! (tup)->t_choice.t_heap.t_xmin \ ) ! #define HeapTupleHeaderGetXmin(tup) \ ( \ ! HeapTupleHeaderXminFrozen(tup) ? \ ! FrozenTransactionId : HeapTupleHeaderGetRawXmin(tup) \ ) ! #define HeapTupleHeaderSetXmin(tup, xid) \ ( \ ! (tup)->t_choice.t_heap.t_xmin = (xid) \ ) #define HeapTupleHeaderXminCommitted(tup) \ --- 317,341 ---- * the xmin to FrozenTransactionId, and that value may still be encountered * on disk. */ ! #define HeapTupleGetRawXmin(tup) \ ( \ ! ShortTransactionIdToNormal((tup)->t_xid_base, (tup)->t_data->t_choice.t_heap.t_xmin) \ ) ! #define HeapTupleGetXmin(tup) \ ( \ ! HeapTupleHeaderXminFrozen((tup)->t_data) ? \ ! FrozenTransactionId : HeapTupleGetRawXmin(tup) \ ) ! #define HeapTupleSetXmin(tup, xid) \ ( \ ! (tup)->t_data->t_choice.t_heap.t_xmin = NormalTransactionIdToShort((tup)->t_xid_base, (xid)) \ ! ) ! ! #define HeapTupleHeaderSetXmin(page, tup, xid) \ ! ( \ ! (tup)->t_choice.t_heap.t_xmin = NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base, (xid)) \ ) #define HeapTupleHeaderXminCommitted(tup) \ *************** struct HeapTupleHeaderData *** 356,379 **** * to resolve the MultiXactId if necessary. This might involve multixact I/O, * so it should only be used if absolutely necessary. */ ! #define HeapTupleHeaderGetUpdateXid(tup) \ ( \ ! (!((tup)->t_infomask & HEAP_XMAX_INVALID) && \ ! ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) && \ ! !((tup)->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \ HeapTupleGetUpdateXid(tup) \ : \ ! HeapTupleHeaderGetRawXmax(tup) \ ) ! #define HeapTupleHeaderGetRawXmax(tup) \ ( \ ! (tup)->t_choice.t_heap.t_xmax \ ) ! #define HeapTupleHeaderSetXmax(tup, xid) \ ( \ ! (tup)->t_choice.t_heap.t_xmax = (xid) \ ) /* --- 379,422 ---- * to resolve the MultiXactId if necessary. This might involve multixact I/O, * so it should only be used if absolutely necessary. */ ! #define HeapTupleGetUpdateXidAny(tup) \ ( \ ! (!((tup)->t_data->t_infomask & HEAP_XMAX_INVALID) && \ ! ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) && \ ! !((tup)->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \ HeapTupleGetUpdateXid(tup) \ : \ ! HeapTupleGetRawXmax(tup) \ ) ! #define HeapTupleGetRawXmax(tup) \ ( \ ! ShortTransactionIdToNormal( \ ! ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? (tup)->t_multi_base : (tup)->t_xid_base, \ ! (tup)->t_data->t_choice.t_heap.t_xmax) \ ) ! #define HeapTupleHeaderGetRawXmax(page, tup) \ ( \ ! ShortTransactionIdToNormal( \ ! ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) ? HeapPageGetSpecial(page)->pd_multi_base : HeapPageGetSpecial(page)->pd_xid_base, \ ! (tup)->t_choice.t_heap.t_xmax) \ ! ) ! ! #define HeapTupleSetXmax(tup, xid) \ ! ( \ ! (tup)->t_data->t_choice.t_heap.t_xmax = \ ! NormalTransactionIdToShort( \ ! ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? (tup)->t_multi_base : (tup)->t_xid_base, \ ! (xid)) \ ! ) ! ! #define HeapTupleHeaderSetXmax(page, tup, xid) \ ! ( \ ! (tup)->t_choice.t_heap.t_xmax = \ ! NormalTransactionIdToShort( \ ! ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) ? HeapPageGetSpecial(page)->pd_multi_base : HeapPageGetSpecial(page)->pd_xid_base, \ ! (xid)) \ ) /* diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h new file mode 100644 index d5e18c6..73c0328 *** a/src/include/access/multixact.h --- b/src/include/access/multixact.h *************** *** 17,32 **** /* * The first two MultiXactId values are reserved to store the truncation Xid ! * and epoch of the first segment, so we start assigning multixact values from * 2. */ ! #define InvalidMultiXactId ((MultiXactId) 0) ! #define FirstMultiXactId ((MultiXactId) 1) ! #define MaxMultiXactId ((MultiXactId) 0xFFFFFFFF) #define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId) ! #define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF) /* Number of SLRU buffers to use for multixact */ #define NUM_MXACTOFFSET_BUFFERS 8 --- 17,32 ---- /* * The first two MultiXactId values are reserved to store the truncation Xid ! * and base of the first segment, so we start assigning multixact values from * 2. */ ! #define InvalidMultiXactId UINT64CONST(0) ! #define FirstMultiXactId UINT64CONST(1) ! #define MaxMultiXactId UINT64CONST(0xFFFFFFFFFFFFFFFF) #define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId) ! #define MaxMultiXactOffset UINT64CONST(0xFFFFFFFFFFFFFFFF) /* Number of SLRU buffers to use for multixact */ #define NUM_MXACTOFFSET_BUFFERS 8 *************** extern bool MultiXactIdIsRunning(MultiXa *** 112,120 **** extern void MultiXactIdSetOldestMember(void); extern int GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **xids, bool allow_old, bool isLockOnly); - extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2); - extern bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, - MultiXactId multi2); extern void AtEOXact_MultiXact(void); extern void AtPrepare_MultiXact(void); --- 112,117 ---- *************** extern void MultiXactSetNextMXact(MultiX *** 142,148 **** extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset); extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); - extern int MultiXactMemberFreezeThreshold(void); extern void multixact_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 len); --- 139,144 ---- diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h new file mode 100644 index 91ff367..5ea3b41 *** a/src/include/access/rewriteheap.h --- b/src/include/access/rewriteheap.h *************** typedef struct LogicalRewriteMappingData *** 51,57 **** * 6) xid of the xact performing the mapping * --- */ ! #define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x-%x" void CheckPointLogicalRewriteHeap(void); #endif /* REWRITE_HEAP_H */ --- 51,57 ---- * 6) xid of the xact performing the mapping * --- */ ! #define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x_%x-%x_%x" void CheckPointLogicalRewriteHeap(void); #endif /* REWRITE_HEAP_H */ diff --git a/src/include/access/slru.h b/src/include/access/slru.h new file mode 100644 index d829a6f..53206c0 *** a/src/include/access/slru.h --- b/src/include/access/slru.h *************** *** 30,39 **** * take no explicit notice of that fact in slru.c, except when comparing * segment and page numbers in SimpleLruTruncate (see PagePrecedes()). */ ! #define SLRU_PAGES_PER_SEGMENT 32 /* Maximum length of an SLRU name */ ! #define SLRU_MAX_NAME_LENGTH 32 /* * Page status codes. Note that these do not include the "dirty" bit. --- 30,39 ---- * take no explicit notice of that fact in slru.c, except when comparing * segment and page numbers in SimpleLruTruncate (see PagePrecedes()). */ ! #define SLRU_PAGES_PER_SEGMENT 2048 /* Maximum length of an SLRU name */ ! #define SLRU_MAX_NAME_LENGTH 64 /* * Page status codes. Note that these do not include the "dirty" bit. *************** typedef struct SlruSharedData *** 66,72 **** char **page_buffer; SlruPageStatus *page_status; bool *page_dirty; ! int *page_number; int *page_lru_count; /* --- 66,72 ---- char **page_buffer; SlruPageStatus *page_status; bool *page_dirty; ! int64 *page_number; int *page_lru_count; /* *************** typedef struct SlruSharedData *** 96,102 **** * this is not critical data, since we use it only to avoid swapping out * the latest page. */ ! int latest_page_number; /* LWLocks */ int lwlock_tranche_id; --- 96,102 ---- * this is not critical data, since we use it only to avoid swapping out * the latest page. */ ! int64 latest_page_number; /* LWLocks */ int lwlock_tranche_id; *************** typedef struct SlruCtlData *** 121,133 **** bool do_fsync; /* - * Decide which of two page numbers is "older" for truncation purposes. We - * need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. - */ - bool (*PagePrecedes) (int, int); - - /* * Dir is set during SimpleLruInit and does not change thereafter. Since * it's always the same, it doesn't need to be in shared memory. */ --- 121,126 ---- *************** typedef SlruCtlData *SlruCtl; *** 140,164 **** extern Size SimpleLruShmemSize(int nslots, int nlsns); extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id); ! extern int SimpleLruZeroPage(SlruCtl ctl, int pageno); ! extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok, TransactionId xid); ! extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid); extern void SimpleLruWritePage(SlruCtl ctl, int slotno); extern void SimpleLruFlush(SlruCtl ctl, bool allow_redirtied); ! extern void SimpleLruTruncate(SlruCtl ctl, int cutoffPage); ! extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno); ! typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int segpage, ! void *data); extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data); ! extern void SlruDeleteSegment(SlruCtl ctl, int segno); /* SlruScanDirectory public callbacks */ extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, ! int segpage, void *data); ! extern bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int segpage, void *data); #endif /* SLRU_H */ --- 133,157 ---- extern Size SimpleLruShmemSize(int nslots, int nlsns); extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, LWLock *ctllock, const char *subdir, int tranche_id); ! extern int SimpleLruZeroPage(SlruCtl ctl, int64 pageno); ! extern int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid); ! extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, TransactionId xid); extern void SimpleLruWritePage(SlruCtl ctl, int slotno); extern void SimpleLruFlush(SlruCtl ctl, bool allow_redirtied); ! extern void SimpleLruTruncate(SlruCtl ctl, int64 cutoffPage); ! extern bool SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int64 pageno); ! typedef bool (*SlruScanCallback) (SlruCtl ctl, char *filename, int64 segpage, ! void *data); extern bool SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data); ! extern void SlruDeleteSegment(SlruCtl ctl, int64 segno); /* SlruScanDirectory public callbacks */ extern bool SlruScanDirCbReportPresence(SlruCtl ctl, char *filename, ! int64 segpage, void *data); ! extern bool SlruScanDirCbDeleteAll(SlruCtl ctl, char *filename, int64 segpage, void *data); #endif /* SLRU_H */ diff --git a/src/include/access/transam.h b/src/include/access/transam.h new file mode 100644 index 86076de..3d9db60 *** a/src/include/access/transam.h --- b/src/include/access/transam.h *************** *** 28,38 **** * Note: if you need to change it, you must change pg_class.h as well. * ---------------- */ ! #define InvalidTransactionId ((TransactionId) 0) ! #define BootstrapTransactionId ((TransactionId) 1) ! #define FrozenTransactionId ((TransactionId) 2) ! #define FirstNormalTransactionId ((TransactionId) 3) ! #define MaxTransactionId ((TransactionId) 0xFFFFFFFF) /* ---------------- * transaction ID manipulation macros --- 28,39 ---- * Note: if you need to change it, you must change pg_class.h as well. * ---------------- */ ! #define InvalidTransactionId UINT64CONST(0) ! #define BootstrapTransactionId UINT64CONST(1) ! #define FrozenTransactionId UINT64CONST(2) ! #define FirstNormalTransactionId UINT64CONST(3) ! #define MaxTransactionId UINT64CONST(0xFFFFFFFFFFFFFFFF) ! #define MaxShortTransactionId ((TransactionId) 0x7FFFFFFF) /* ---------------- * transaction ID manipulation macros *************** *** 43,72 **** #define TransactionIdEquals(id1, id2) ((id1) == (id2)) #define TransactionIdStore(xid, dest) (*(dest) = (xid)) #define StoreInvalidTransactionId(dest) (*(dest) = InvalidTransactionId) /* advance a transaction ID variable, handling wraparound correctly */ #define TransactionIdAdvance(dest) \ do { \ (dest)++; \ ! if ((dest) < FirstNormalTransactionId) \ ! (dest) = FirstNormalTransactionId; \ } while(0) /* back up a transaction ID variable, handling wraparound correctly */ #define TransactionIdRetreat(dest) \ do { \ (dest)--; \ ! } while ((dest) < FirstNormalTransactionId) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdPrecedes(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ ! (int32) ((id1) - (id2)) < 0) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdFollows(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ ! (int32) ((id1) - (id2)) > 0) /* ---------- * Object ID (OID) zero is InvalidOid. --- 44,80 ---- #define TransactionIdEquals(id1, id2) ((id1) == (id2)) #define TransactionIdStore(xid, dest) (*(dest) = (xid)) #define StoreInvalidTransactionId(dest) (*(dest) = InvalidTransactionId) + #define ShortTransactionIdToNormal(base, xid) \ + (TransactionIdIsNormal(xid) ? (TransactionId)(xid) + (base) : (TransactionId)(xid)) + #define NormalTransactionIdToShort(base, xid) \ + (TransactionIdIsNormal(xid) ? (ShortTransactionId)( \ + AssertMacro((xid) >= (base) + FirstNormalTransactionId), \ + AssertMacro((xid) <= (base) + MaxShortTransactionId), \ + (xid) - (base)) : (ShortTransactionId)(xid)) /* advance a transaction ID variable, handling wraparound correctly */ #define TransactionIdAdvance(dest) \ do { \ (dest)++; \ ! Assert((dest) > FirstNormalTransactionId); \ } while(0) /* back up a transaction ID variable, handling wraparound correctly */ #define TransactionIdRetreat(dest) \ do { \ + Assert((dest) > FirstNormalTransactionId); \ (dest)--; \ ! } while(0) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdPrecedes(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ ! (int64) ((id1) - (id2)) < 0) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdFollows(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ ! (int64) ((id1) - (id2)) > 0) /* ---------- * Object ID (OID) zero is InvalidOid. *************** typedef struct VariableCacheData *** 118,126 **** TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ TransactionId xidVacLimit; /* start forcing autovacuums here */ - TransactionId xidWarnLimit; /* start complaining here */ - TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */ - TransactionId xidWrapLimit; /* where the world ends */ Oid oldestXidDB; /* database with minimum datfrozenxid */ /* --- 126,131 ---- *************** extern void TransactionIdAbort(Transacti *** 166,175 **** extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids); extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn); extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids); - extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2); - extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2); - extern bool TransactionIdFollows(TransactionId id1, TransactionId id2); - extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2); extern TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids); extern XLogRecPtr TransactionIdGetCommitLSN(TransactionId xid); --- 171,176 ---- diff --git a/src/include/access/xact.h b/src/include/access/xact.h new file mode 100644 index f2c10f9..5f3cf56 *** a/src/include/access/xact.h --- b/src/include/access/xact.h *************** typedef void (*SubXactCallback) (SubXact *** 151,159 **** */ #define XACT_XINFO_HAS_DBINFO (1U << 0) #define XACT_XINFO_HAS_SUBXACTS (1U << 1) ! #define XACT_XINFO_HAS_RELFILENODES (1U << 2) ! #define XACT_XINFO_HAS_INVALS (1U << 3) ! #define XACT_XINFO_HAS_TWOPHASE (1U << 4) #define XACT_XINFO_HAS_ORIGIN (1U << 5) #define XACT_XINFO_HAS_AE_LOCKS (1U << 6) --- 151,159 ---- */ #define XACT_XINFO_HAS_DBINFO (1U << 0) #define XACT_XINFO_HAS_SUBXACTS (1U << 1) ! #define XACT_XINFO_HAS_TWOPHASE (1U << 2) ! #define XACT_XINFO_HAS_RELFILENODES (1U << 3) ! #define XACT_XINFO_HAS_INVALS (1U << 4) #define XACT_XINFO_HAS_ORIGIN (1U << 5) #define XACT_XINFO_HAS_AE_LOCKS (1U << 6) *************** typedef struct xl_xact_xinfo *** 210,216 **** * four so following records don't have to care about alignment. Commit * records can be large, so copying large portions isn't attractive. */ ! uint32 xinfo; } xl_xact_xinfo; typedef struct xl_xact_dbinfo --- 210,216 ---- * four so following records don't have to care about alignment. Commit * records can be large, so copying large portions isn't attractive. */ ! uint64 xinfo; } xl_xact_xinfo; typedef struct xl_xact_dbinfo *************** typedef struct xl_xact_subxacts *** 226,231 **** --- 226,236 ---- } xl_xact_subxacts; #define MinSizeOfXactSubxacts offsetof(xl_xact_subxacts, subxacts) + typedef struct xl_xact_twophase + { + TransactionId xid; + } xl_xact_twophase; + typedef struct xl_xact_relfilenodes { int nrels; /* number of subtransaction XIDs */ *************** typedef struct xl_xact_invals *** 240,250 **** } xl_xact_invals; #define MinSizeOfXactInvals offsetof(xl_xact_invals, msgs) - typedef struct xl_xact_twophase - { - TransactionId xid; - } xl_xact_twophase; - typedef struct xl_xact_origin { XLogRecPtr origin_lsn; --- 245,250 ---- *************** typedef struct xl_xact_commit *** 258,266 **** /* xl_xact_xinfo follows if XLOG_XACT_HAS_INFO */ /* xl_xact_dbinfo follows if XINFO_HAS_DBINFO */ /* xl_xact_subxacts follows if XINFO_HAS_SUBXACT */ /* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */ /* xl_xact_invals follows if XINFO_HAS_INVALS */ - /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ } xl_xact_commit; #define MinSizeOfXactCommit (offsetof(xl_xact_commit, xact_time) + sizeof(TimestampTz)) --- 258,266 ---- /* xl_xact_xinfo follows if XLOG_XACT_HAS_INFO */ /* xl_xact_dbinfo follows if XINFO_HAS_DBINFO */ /* xl_xact_subxacts follows if XINFO_HAS_SUBXACT */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ /* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */ /* xl_xact_invals follows if XINFO_HAS_INVALS */ /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ } xl_xact_commit; #define MinSizeOfXactCommit (offsetof(xl_xact_commit, xact_time) + sizeof(TimestampTz)) *************** typedef struct xl_xact_abort *** 272,280 **** /* xl_xact_xinfo follows if XLOG_XACT_HAS_INFO */ /* No db_info required */ /* xl_xact_subxacts follows if HAS_SUBXACT */ /* xl_xact_relfilenodes follows if HAS_RELFILENODES */ /* No invalidation messages needed. */ - /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ } xl_xact_abort; #define MinSizeOfXactAbort sizeof(xl_xact_abort) --- 272,280 ---- /* xl_xact_xinfo follows if XLOG_XACT_HAS_INFO */ /* No db_info required */ /* xl_xact_subxacts follows if HAS_SUBXACT */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ /* xl_xact_relfilenodes follows if HAS_RELFILENODES */ /* No invalidation messages needed. */ } xl_xact_abort; #define MinSizeOfXactAbort sizeof(xl_xact_abort) diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h new file mode 100644 index 66bfb77..e9ad3d6 *** a/src/include/access/xlog.h --- b/src/include/access/xlog.h *************** extern XLogRecPtr GetRedoRecPtr(void); *** 274,280 **** extern XLogRecPtr GetInsertRecPtr(void); extern XLogRecPtr GetFlushRecPtr(void); extern XLogRecPtr GetLastImportantRecPtr(void); - extern void GetNextXidAndEpoch(TransactionId *xid, uint32 *epoch); extern void RemovePromoteSignalFiles(void); extern bool CheckPromoteSignal(void); --- 274,279 ---- diff --git a/src/include/c.h b/src/include/c.h new file mode 100644 index 630dfbf..3a3eef5 *** a/src/include/c.h --- b/src/include/c.h *************** typedef double float8; *** 390,408 **** typedef Oid regproc; typedef regproc RegProcedure; ! typedef uint32 TransactionId; ! typedef uint32 LocalTransactionId; ! typedef uint32 SubTransactionId; #define InvalidSubTransactionId ((SubTransactionId) 0) #define TopSubTransactionId ((SubTransactionId) 1) /* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */ typedef TransactionId MultiXactId; ! typedef uint32 MultiXactOffset; typedef uint32 CommandId; --- 390,433 ---- typedef Oid regproc; typedef regproc RegProcedure; ! /* Macro for checking XID 64-bitness */ ! #define XID_IS_64BIT ! #define MAX_START_XID UINT64CONST(0x3fffffffffffffff) ! typedef uint64 TransactionId; ! ! #define TransactionIdPrecedes(id1, id2) ((id1) < (id2)) ! #define TransactionIdPrecedesOrEquals(id1, id2) ((id1) <= (id2)) ! #define TransactionIdFollows(id1, id2) ((id1) > (id2)) ! #define TransactionIdFollowsOrEquals(id1, id2) ((id1) >= (id2)) ! ! #define StartTransactionIdIsValid(start_xid) ((start_xid) <= MAX_START_XID) ! ! typedef uint32 ShortTransactionId; ! ! typedef uint64 LocalTransactionId; ! ! typedef uint64 SubTransactionId; #define InvalidSubTransactionId ((SubTransactionId) 0) #define TopSubTransactionId ((SubTransactionId) 1) + #define XID_FMT UINT64_FORMAT + /* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */ typedef TransactionId MultiXactId; ! #define MultiXactIdPrecedes(id1, id2) ((id1) < (id2)) ! #define MultiXactIdPrecedesOrEquals(id1, id2) ((id1) <= (id2)) ! #define MultiXactIdFollows(id1, id2) ((id1) > (id2)) ! #define MultiXactIdFollowsOrEquals(id1, id2) ((id1) >= (id2)) ! ! #define StartMultiXactIdIsValid(start_mx_id) ((start_mx_id) <= MAX_START_XID) ! ! typedef uint64 MultiXactOffset; ! ! #define StartMultiXactOffsetIsValid(start_mx_offset) ((start_mx_offset) <= MAX_START_XID) typedef uint32 CommandId; diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h new file mode 100644 index 1ec03ca..3f2d213 *** a/src/include/catalog/pg_control.h --- b/src/include/catalog/pg_control.h *************** typedef struct CheckPoint *** 39,45 **** TimeLineID PrevTimeLineID; /* previous TLI, if this record begins a new * timeline (equals ThisTimeLineID otherwise) */ bool fullPageWrites; /* current full_page_writes */ - uint32 nextXidEpoch; /* higher-order bits of nextXid */ TransactionId nextXid; /* next free XID */ Oid nextOid; /* next free OID */ MultiXactId nextMulti; /* next free MultiXactId */ --- 39,44 ---- diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h new file mode 100644 index ff9b470..7cb8981 *** a/src/include/catalog/pg_operator.h --- b/src/include/catalog/pg_operator.h *************** DESCR("concatenate"); *** 144,154 **** DATA(insert OID = 352 ( "=" PGNSP PGUID b f t 28 28 16 352 3315 xideq eqsel eqjoinsel )); DESCR("equal"); ! DATA(insert OID = 353 ( "=" PGNSP PGUID b f f 28 23 16 0 3316 xideqint4 eqsel eqjoinsel )); DESCR("equal"); DATA(insert OID = 3315 ( "<>" PGNSP PGUID b f f 28 28 16 3315 352 xidneq neqsel neqjoinsel )); DESCR("not equal"); ! DATA(insert OID = 3316 ( "<>" PGNSP PGUID b f f 28 23 16 0 353 xidneqint4 neqsel neqjoinsel )); DESCR("not equal"); DATA(insert OID = 388 ( "!" PGNSP PGUID r f f 20 0 1700 0 0 numeric_fac - - )); DESCR("factorial"); --- 144,154 ---- DATA(insert OID = 352 ( "=" PGNSP PGUID b f t 28 28 16 352 3315 xideq eqsel eqjoinsel )); DESCR("equal"); ! DATA(insert OID = 353 ( "=" PGNSP PGUID b f f 28 20 16 0 3316 xideqint8 eqsel eqjoinsel )); DESCR("equal"); DATA(insert OID = 3315 ( "<>" PGNSP PGUID b f f 28 28 16 3315 352 xidneq neqsel neqjoinsel )); DESCR("not equal"); ! DATA(insert OID = 3316 ( "<>" PGNSP PGUID b f f 28 20 16 0 353 xidneqint8 neqsel neqjoinsel )); DESCR("not equal"); DATA(insert OID = 388 ( "!" PGNSP PGUID r f f 20 0 1700 0 0 numeric_fac - - )); DESCR("factorial"); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h new file mode 100644 index f73c6c6..668e24c *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** DESCR("length"); *** 1482,1489 **** DATA(insert OID = 1318 ( length PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 23 "1042" _null_ _null_ _null_ _null_ _null_ bpcharlen _null_ _null_ _null_ )); DESCR("character length"); ! DATA(insert OID = 1319 ( xideqint4 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "28 23" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ )); ! DATA(insert OID = 3309 ( xidneqint4 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "28 23" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ )); DATA(insert OID = 1326 ( interval_div PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1186 "1186 701" _null_ _null_ _null_ _null_ _null_ interval_div _null_ _null_ _null_ )); --- 1482,1489 ---- DATA(insert OID = 1318 ( length PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 23 "1042" _null_ _null_ _null_ _null_ _null_ bpcharlen _null_ _null_ _null_ )); DESCR("character length"); ! DATA(insert OID = 1319 ( xideqint8 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "28 20" _null_ _null_ _null_ _null_ _null_ xideq _null_ _null_ _null_ )); ! DATA(insert OID = 3309 ( xidneqint8 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 2 0 16 "28 20" _null_ _null_ _null_ _null_ _null_ xidneq _null_ _null_ _null_ )); DATA(insert OID = 1326 ( interval_div PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1186 "1186 701" _null_ _null_ _null_ _null_ _null_ interval_div _null_ _null_ _null_ )); diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h new file mode 100644 index ffdb452..548e3ec *** a/src/include/catalog/pg_type.h --- b/src/include/catalog/pg_type.h *************** DATA(insert OID = 27 ( tid PGNSP PGU *** 331,337 **** DESCR("(block, offset), physical location of tuple"); #define TIDOID 27 ! DATA(insert OID = 28 ( xid PGNSP PGUID 4 t b U f t \054 0 0 1011 xidin xidout xidrecv xidsend - - - i p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("transaction id"); #define XIDOID 28 --- 331,337 ---- DESCR("(block, offset), physical location of tuple"); #define TIDOID 27 ! DATA(insert OID = 28 ( xid PGNSP PGUID 8 FLOAT8PASSBYVAL b U f t \054 0 0 1011 xidin xidout xidrecv xidsend - - - d p f 0 -1 0 0 _null_ _null_ _null_ )); DESCR("transaction id"); #define XIDOID 28 *************** DATA(insert OID = 1009 ( _text PGNSP *** 471,477 **** DATA(insert OID = 1028 ( _oid PGNSP PGUID -1 f b A f t \054 0 26 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); #define OIDARRAYOID 1028 DATA(insert OID = 1010 ( _tid PGNSP PGUID -1 f b A f t \054 0 27 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1011 ( _xid PGNSP PGUID -1 f b A f t \054 0 28 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1012 ( _cid PGNSP PGUID -1 f b A f t \054 0 29 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1013 ( _oidvector PGNSP PGUID -1 f b A f t \054 0 30 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1014 ( _bpchar PGNSP PGUID -1 f b A f t \054 0 1042 0 array_in array_out array_recv array_send bpchartypmodin bpchartypmodout array_typanalyze i x f 0 -1 0 100 _null_ _null_ _null_ )); --- 471,477 ---- DATA(insert OID = 1028 ( _oid PGNSP PGUID -1 f b A f t \054 0 26 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); #define OIDARRAYOID 1028 DATA(insert OID = 1010 ( _tid PGNSP PGUID -1 f b A f t \054 0 27 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); ! DATA(insert OID = 1011 ( _xid PGNSP PGUID -1 f b A f t \054 0 28 0 array_in array_out array_recv array_send - - array_typanalyze d x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1012 ( _cid PGNSP PGUID -1 f b A f t \054 0 29 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1013 ( _oidvector PGNSP PGUID -1 f b A f t \054 0 30 0 array_in array_out array_recv array_send - - array_typanalyze i x f 0 -1 0 0 _null_ _null_ _null_ )); DATA(insert OID = 1014 ( _bpchar PGNSP PGUID -1 f b A f t \054 0 1042 0 array_in array_out array_recv array_send bpchartypmodin bpchartypmodout array_typanalyze i x f 0 -1 0 100 _null_ _null_ _null_ )); diff --git a/src/include/commands/sequence.h b/src/include/commands/sequence.h new file mode 100644 index caab195..34f876f *** a/src/include/commands/sequence.h --- b/src/include/commands/sequence.h *************** typedef FormData_pg_sequence_data *Form_ *** 45,50 **** --- 45,55 ---- /* XLOG stuff */ #define XLOG_SEQ_LOG 0x00 + /* + * The "special area" of a sequence's buffer page looks like this. + */ + #define SEQ_MAGIC 0x1717 + typedef struct xl_seq_rec { RelFileNode node; diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h new file mode 100644 index a903511..54f8f2c *** a/src/include/commands/vacuum.h --- b/src/include/commands/vacuum.h *************** typedef struct VacAttrStats *** 135,145 **** */ typedef struct VacuumParams { ! int freeze_min_age; /* min freeze age, -1 to use default */ ! int freeze_table_age; /* age at which to scan whole table */ ! int multixact_freeze_min_age; /* min multixact freeze age, -1 to * use default */ ! int multixact_freeze_table_age; /* multixact age at which to scan * whole table */ bool is_wraparound; /* force a for-wraparound vacuum */ int log_min_duration; /* minimum execution threshold in ms at --- 135,145 ---- */ typedef struct VacuumParams { ! int64 freeze_min_age; /* min freeze age, -1 to use default */ ! int64 freeze_table_age; /* age at which to scan whole table */ ! int64 multixact_freeze_min_age; /* min multixact freeze age, -1 to * use default */ ! int64 multixact_freeze_table_age; /* multixact age at which to scan * whole table */ bool is_wraparound; /* force a for-wraparound vacuum */ int log_min_duration; /* minimum execution threshold in ms at *************** typedef struct VacuumParams *** 149,158 **** /* GUC parameters */ extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */ ! extern int vacuum_freeze_min_age; ! extern int vacuum_freeze_table_age; ! extern int vacuum_multixact_freeze_min_age; ! extern int vacuum_multixact_freeze_table_age; /* in commands/vacuum.c */ --- 149,158 ---- /* GUC parameters */ extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */ ! extern int64 vacuum_freeze_min_age; ! extern int64 vacuum_freeze_table_age; ! extern int64 vacuum_multixact_freeze_min_age; ! extern int64 vacuum_multixact_freeze_table_age; /* in commands/vacuum.c */ *************** extern void vac_update_relstats(Relation *** 176,184 **** MultiXactId minmulti, bool in_outer_xact); extern void vacuum_set_xid_limits(Relation rel, ! int freeze_min_age, int freeze_table_age, ! int multixact_freeze_min_age, ! int multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, --- 176,184 ---- MultiXactId minmulti, bool in_outer_xact); extern void vacuum_set_xid_limits(Relation rel, ! int64 freeze_min_age, int64 freeze_table_age, ! int64 multixact_freeze_min_age, ! int64 multixact_freeze_table_age, TransactionId *oldestXmin, TransactionId *freezeLimit, TransactionId *xidFullScanLimit, diff --git a/src/include/fmgr.h b/src/include/fmgr.h new file mode 100644 index b604a5c..7c26bbe *** a/src/include/fmgr.h --- b/src/include/fmgr.h *************** extern struct varlena *pg_detoast_datum_ *** 245,250 **** --- 245,251 ---- #define PG_GETARG_FLOAT4(n) DatumGetFloat4(PG_GETARG_DATUM(n)) #define PG_GETARG_FLOAT8(n) DatumGetFloat8(PG_GETARG_DATUM(n)) #define PG_GETARG_INT64(n) DatumGetInt64(PG_GETARG_DATUM(n)) + #define PG_GETARG_TRANSACTIONID(n) DatumGetTransactionId(PG_GETARG_DATUM(n)) /* use this if you want the raw, possibly-toasted input datum: */ #define PG_GETARG_RAW_VARLENA_P(n) ((struct varlena *) PG_GETARG_POINTER(n)) /* use this if you want the input datum de-toasted: */ *************** extern struct varlena *pg_detoast_datum_ *** 325,330 **** --- 326,332 ---- #define PG_RETURN_FLOAT4(x) return Float4GetDatum(x) #define PG_RETURN_FLOAT8(x) return Float8GetDatum(x) #define PG_RETURN_INT64(x) return Int64GetDatum(x) + #define PG_RETURN_TRANSACTIONID(x) return TransactionIdGetDatum(x) #define PG_RETURN_UINT64(x) return UInt64GetDatum(x) /* RETURN macros for other pass-by-ref types will typically look like this: */ #define PG_RETURN_BYTEA_P(x) PG_RETURN_POINTER(x) diff --git a/src/include/postgres.h b/src/include/postgres.h new file mode 100644 index 1ca9b60..1486b2f *** a/src/include/postgres.h --- b/src/include/postgres.h *************** typedef Datum *DatumPtr; *** 517,537 **** * Returns transaction identifier value of a datum. */ ! #define DatumGetTransactionId(X) ((TransactionId) GET_4_BYTES(X)) /* * TransactionIdGetDatum * Returns datum representation for a transaction identifier. */ ! #define TransactionIdGetDatum(X) ((Datum) SET_4_BYTES((X))) /* * MultiXactIdGetDatum * Returns datum representation for a multixact identifier. */ ! #define MultiXactIdGetDatum(X) ((Datum) SET_4_BYTES((X))) /* * DatumGetCommandId --- 517,537 ---- * Returns transaction identifier value of a datum. */ ! #define DatumGetTransactionId(X) (DatumGetUInt64(X)) /* * TransactionIdGetDatum * Returns datum representation for a transaction identifier. */ ! #define TransactionIdGetDatum(X) (UInt64GetDatum(X)) /* * MultiXactIdGetDatum * Returns datum representation for a multixact identifier. */ ! #define MultiXactIdGetDatum(X) (UInt64GetDatum(X)) /* * DatumGetCommandId diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h new file mode 100644 index 3469915..99f1ecd *** a/src/include/postmaster/autovacuum.h --- b/src/include/postmaster/autovacuum.h *************** extern int autovacuum_vac_thresh; *** 35,42 **** extern double autovacuum_vac_scale; extern int autovacuum_anl_thresh; extern double autovacuum_anl_scale; ! extern int autovacuum_freeze_max_age; ! extern int autovacuum_multixact_freeze_max_age; extern int autovacuum_vac_cost_delay; extern int autovacuum_vac_cost_limit; --- 35,42 ---- extern double autovacuum_vac_scale; extern int autovacuum_anl_thresh; extern double autovacuum_anl_scale; ! extern int64 autovacuum_freeze_max_age; ! extern int64 autovacuum_multixact_freeze_max_age; extern int autovacuum_vac_cost_delay; extern int autovacuum_vac_cost_limit; diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h new file mode 100644 index cafd365..3e1fb91 *** a/src/include/storage/bufpage.h --- b/src/include/storage/bufpage.h *************** typedef struct PageHeaderData *** 156,164 **** --- 156,167 ---- typedef PageHeaderData *PageHeader; + /* * HeapPageSpecialData -- data that stored at the end of each heap page. * + * pd_xid_base - base value for transaction IDs on page + * pd_multi_base - base value for multixact IDs on page * pd_prune_xid - oldest XID among potentially prunable tuples on page. * pd_magic - magic number identifies type of page * *************** typedef PageHeaderData *PageHeader; *** 170,177 **** */ typedef struct HeapPageSpecialData { ! TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ ! uint32 pd_magic; /* magic number identifies type of page */ } HeapPageSpecialData; /* --- 173,182 ---- */ typedef struct HeapPageSpecialData { ! TransactionId pd_xid_base; /* base value for transaction IDs on page */ ! TransactionId pd_multi_base; /* base value for multixact IDs on page */ ! ShortTransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ ! uint32 pd_magic; /* magic number identifies type of page */ } HeapPageSpecialData; /* *************** typedef HeapPageSpecialData *HeapPageSpe *** 192,197 **** --- 197,212 ---- (HeapPageSpecial) ((Pointer) page + BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData))) \ ) + #define HeapPageSetPruneXid(page, xid) \ + ( \ + HeapPageGetSpecial(page)->pd_prune_xid = NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base, (xid)) \ + ) + + #define HeapPageGetPruneXid(page) \ + ( \ + ShortTransactionIdToNormal(HeapPageGetSpecial(page)->pd_xid_base, HeapPageGetSpecial(page)->pd_prune_xid) \ + ) + /* * pd_flags contains the following flag bits. Undefined bits are initialized * to zero and may be used in the future. *************** PageValidateSpecialPointer(Page page) *** 419,436 **** #define PageIsPrunable(page, oldestxmin) \ ( \ AssertMacro(TransactionIdIsNormal(oldestxmin)), \ ! TransactionIdIsValid(HeapPageGetSpecial(page)->pd_prune_xid) && \ ! TransactionIdPrecedes(HeapPageGetSpecial(page)->pd_prune_xid, oldestxmin) \ ) #define PageSetPrunable(page, xid) \ do { \ Assert(TransactionIdIsNormal(xid)); \ ! if (!TransactionIdIsValid(HeapPageGetSpecial(page)->pd_prune_xid) || \ ! TransactionIdPrecedes(xid, HeapPageGetSpecial(page)->pd_prune_xid)) \ ! HeapPageGetSpecial(page)->pd_prune_xid = (xid); \ } while (0) #define PageClearPrunable(page) \ ! (HeapPageGetSpecial(page)->pd_prune_xid = InvalidTransactionId) /* ---------------------------------------------------------------- --- 434,451 ---- #define PageIsPrunable(page, oldestxmin) \ ( \ AssertMacro(TransactionIdIsNormal(oldestxmin)), \ ! TransactionIdIsValid(HeapPageGetPruneXid(page)) && \ ! TransactionIdPrecedes(HeapPageGetPruneXid(page), oldestxmin) \ ) #define PageSetPrunable(page, xid) \ do { \ Assert(TransactionIdIsNormal(xid)); \ ! if (!TransactionIdIsValid(HeapPageGetPruneXid(page)) || \ ! TransactionIdPrecedes(xid, HeapPageGetPruneXid(page))) \ ! HeapPageSetPruneXid(page, xid); \ } while (0) #define PageClearPrunable(page) \ ! (HeapPageSetPruneXid(page, InvalidTransactionId)) /* ---------------------------------------------------------------- diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h new file mode 100644 index 765431e..5eb83e7 *** a/src/include/storage/lock.h --- b/src/include/storage/lock.h *************** typedef struct LOCKTAG *** 223,230 **** (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) #define SET_LOCKTAG_TRANSACTION(locktag,xid) \ ! ((locktag).locktag_field1 = (xid), \ ! (locktag).locktag_field2 = 0, \ (locktag).locktag_field3 = 0, \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_TRANSACTION, \ --- 223,230 ---- (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) #define SET_LOCKTAG_TRANSACTION(locktag,xid) \ ! ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \ ! (locktag).locktag_field2 = (uint32)((xid) >> 32), \ (locktag).locktag_field3 = 0, \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_TRANSACTION, \ *************** typedef struct LOCKTAG *** 232,247 **** #define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \ ((locktag).locktag_field1 = (vxid).backendId, \ ! (locktag).locktag_field2 = (vxid).localTransactionId, \ ! (locktag).locktag_field3 = 0, \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) #define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \ ! ((locktag).locktag_field1 = (xid), \ ! (locktag).locktag_field2 = (token), \ ! (locktag).locktag_field3 = 0, \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) --- 232,247 ---- #define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \ ((locktag).locktag_field1 = (vxid).backendId, \ ! (locktag).locktag_field2 = (uint32)((vxid).localTransactionId & 0xFFFFFFFF), \ ! (locktag).locktag_field3 = (uint32)((vxid).localTransactionId >> 32), \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) #define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \ ! ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \ ! (locktag).locktag_field2 = (uint32)((xid) >> 32), \ ! (locktag).locktag_field3 = (token), \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h new file mode 100644 index f5404b4..6388336 *** a/src/include/storage/standby.h --- b/src/include/storage/standby.h *************** *** 20,26 **** #include "storage/relfilenode.h" /* User-settable GUC parameters */ ! extern int vacuum_defer_cleanup_age; extern int max_standby_archive_delay; extern int max_standby_streaming_delay; --- 20,26 ---- #include "storage/relfilenode.h" /* User-settable GUC parameters */ ! extern int64 vacuum_defer_cleanup_age; extern int max_standby_archive_delay; extern int max_standby_streaming_delay; diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h new file mode 100644 index 4bc61e5..e5959a1 *** a/src/include/utils/rel.h --- b/src/include/utils/rel.h *************** typedef struct AutoVacOpts *** 262,273 **** int analyze_threshold; int vacuum_cost_delay; int vacuum_cost_limit; ! int freeze_min_age; ! int freeze_max_age; ! int freeze_table_age; ! int multixact_freeze_min_age; ! int multixact_freeze_max_age; ! int multixact_freeze_table_age; int log_min_duration; float8 vacuum_scale_factor; float8 analyze_scale_factor; --- 262,273 ---- int analyze_threshold; int vacuum_cost_delay; int vacuum_cost_limit; ! int64 freeze_min_age; ! int64 freeze_max_age; ! int64 freeze_table_age; ! int64 multixact_freeze_min_age; ! int64 multixact_freeze_max_age; ! int64 multixact_freeze_table_age; int log_min_duration; float8 vacuum_scale_factor; float8 analyze_scale_factor; diff --git a/src/include/utils/tqual.h b/src/include/utils/tqual.h new file mode 100644 index 9a3b56e..6ca141b *** a/src/include/utils/tqual.h --- b/src/include/utils/tqual.h *************** extern bool HeapTupleIsSurelyDead(HeapTu *** 81,87 **** extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid); ! extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple); /* * To avoid leaking too much knowledge about reorderbuffer implementation --- 81,87 ---- extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid); ! extern bool HeapTupleHeaderIsOnlyLocked(HeapTuple tuple); /* * To avoid leaking too much knowledge about reorderbuffer implementation diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c new file mode 100644 index 5a575bd..1bc5d0f *** a/src/pl/plperl/plperl.c --- b/src/pl/plperl/plperl.c *************** validate_plperl_function(plperl_proc_ptr *** 2648,2654 **** * This is needed because CREATE OR REPLACE FUNCTION can modify the * function's pg_proc entry without changing its OID. ************************************************************/ ! uptodate = (prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)); if (uptodate) --- 2648,2654 ---- * This is needed because CREATE OR REPLACE FUNCTION can modify the * function's pg_proc entry without changing its OID. ************************************************************/ ! uptodate = (prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)); if (uptodate) *************** compile_plperl_function(Oid fn_oid, bool *** 2771,2777 **** prodesc->proname = pstrdup(NameStr(procStruct->proname)); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; ! prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); --- 2771,2777 ---- prodesc->proname = pstrdup(NameStr(procStruct->proname)); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; ! prodesc->fn_xmin = HeapTupleGetRawXmin(procTup); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c new file mode 100644 index 9931ee0..03f47bd *** a/src/pl/plpgsql/src/pl_comp.c --- b/src/pl/plpgsql/src/pl_comp.c *************** recheck: *** 172,178 **** if (function) { /* We have a compiled function, but is it still valid? */ ! if (function->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && ItemPointerEquals(&function->fn_tid, &procTup->t_self)) function_valid = true; else --- 172,178 ---- if (function) { /* We have a compiled function, but is it still valid? */ ! if (function->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&function->fn_tid, &procTup->t_self)) function_valid = true; else *************** do_compile(FunctionCallInfo fcinfo, *** 348,354 **** function->fn_signature = format_procedure(fcinfo->flinfo->fn_oid); function->fn_oid = fcinfo->flinfo->fn_oid; ! function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); function->fn_tid = procTup->t_self; function->fn_input_collation = fcinfo->fncollation; function->fn_cxt = func_cxt; --- 348,354 ---- function->fn_signature = format_procedure(fcinfo->flinfo->fn_oid); function->fn_oid = fcinfo->flinfo->fn_oid; ! function->fn_xmin = HeapTupleGetRawXmin(procTup); function->fn_tid = procTup->t_self; function->fn_input_collation = fcinfo->fncollation; function->fn_cxt = func_cxt; diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c new file mode 100644 index 9716697..7dc1bd9 *** a/src/pl/plpgsql/src/pl_exec.c --- b/src/pl/plpgsql/src/pl_exec.c *************** get_tuple_from_datum(Datum value) *** 6152,6157 **** --- 6152,6158 ---- tmptup.t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tmptup); tmptup.t_data = td; /* Build a copy and return it */ *************** exec_move_row_from_datum(PLpgSQL_execsta *** 6208,6213 **** --- 6209,6215 ---- tmptup.t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tmptup); tmptup.t_data = td; /* Do the move */ diff --git a/src/pl/plpython/plpy_procedure.c b/src/pl/plpython/plpy_procedure.c new file mode 100644 index 26acc88..2fd831a *** a/src/pl/plpython/plpy_procedure.c --- b/src/pl/plpython/plpy_procedure.c *************** PLy_procedure_create(HeapTuple procTup, *** 184,190 **** proc->proname = pstrdup(NameStr(procStruct->proname)); proc->pyname = pstrdup(procName); ! proc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); proc->fn_tid = procTup->t_self; proc->fn_readonly = (procStruct->provolatile != PROVOLATILE_VOLATILE); proc->is_setof = procStruct->proretset; --- 184,190 ---- proc->proname = pstrdup(NameStr(procStruct->proname)); proc->pyname = pstrdup(procName); ! proc->fn_xmin = HeapTupleGetRawXmin(procTup); proc->fn_tid = procTup->t_self; proc->fn_readonly = (procStruct->provolatile != PROVOLATILE_VOLATILE); proc->is_setof = procStruct->proretset; *************** PLy_procedure_argument_valid(PLyTypeInfo *** 455,461 **** elog(ERROR, "cache lookup failed for relation %u", arg->typ_relid); /* If it has changed, the cached data is not valid */ ! valid = (arg->typrel_xmin == HeapTupleHeaderGetRawXmin(relTup->t_data) && ItemPointerEquals(&arg->typrel_tid, &relTup->t_self)); ReleaseSysCache(relTup); --- 455,461 ---- elog(ERROR, "cache lookup failed for relation %u", arg->typ_relid); /* If it has changed, the cached data is not valid */ ! valid = (arg->typrel_xmin == HeapTupleGetRawXmin(relTup) && ItemPointerEquals(&arg->typrel_tid, &relTup->t_self)); ReleaseSysCache(relTup); *************** PLy_procedure_valid(PLyProcedure *proc, *** 476,482 **** return false; /* If the pg_proc tuple has changed, it's not valid */ ! if (!(proc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && ItemPointerEquals(&proc->fn_tid, &procTup->t_self))) return false; --- 476,482 ---- return false; /* If the pg_proc tuple has changed, it's not valid */ ! if (!(proc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&proc->fn_tid, &procTup->t_self))) return false; diff --git a/src/pl/plpython/plpy_typeio.c b/src/pl/plpython/plpy_typeio.c new file mode 100644 index e4af8cc..f3fdd79 *** a/src/pl/plpython/plpy_typeio.c --- b/src/pl/plpython/plpy_typeio.c *************** PLy_input_tuple_funcs(PLyTypeInfo *arg, *** 143,149 **** elog(ERROR, "cache lookup failed for relation %u", arg->typ_relid); /* Remember XMIN and TID for later validation if cache is still OK */ ! arg->typrel_xmin = HeapTupleHeaderGetRawXmin(relTup->t_data); arg->typrel_tid = relTup->t_self; ReleaseSysCache(relTup); --- 143,149 ---- elog(ERROR, "cache lookup failed for relation %u", arg->typ_relid); /* Remember XMIN and TID for later validation if cache is still OK */ ! arg->typrel_xmin = HeapTupleGetRawXmin(relTup); arg->typrel_tid = relTup->t_self; ReleaseSysCache(relTup); *************** PLy_output_tuple_funcs(PLyTypeInfo *arg, *** 215,221 **** elog(ERROR, "cache lookup failed for relation %u", arg->typ_relid); /* Remember XMIN and TID for later validation if cache is still OK */ ! arg->typrel_xmin = HeapTupleHeaderGetRawXmin(relTup->t_data); arg->typrel_tid = relTup->t_self; ReleaseSysCache(relTup); --- 215,221 ---- elog(ERROR, "cache lookup failed for relation %u", arg->typ_relid); /* Remember XMIN and TID for later validation if cache is still OK */ ! arg->typrel_xmin = HeapTupleGetRawXmin(relTup); arg->typrel_tid = relTup->t_self; ReleaseSysCache(relTup); diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c new file mode 100644 index 09f87ec..8f7aec1 *** a/src/pl/tcl/pltcl.c --- b/src/pl/tcl/pltcl.c *************** compile_pltcl_function(Oid fn_oid, Oid t *** 1397,1403 **** * function's pg_proc entry without changing its OID. ************************************************************/ if (prodesc != NULL && ! prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)) { /* It's still up-to-date, so we can use it */ --- 1397,1403 ---- * function's pg_proc entry without changing its OID. ************************************************************/ if (prodesc != NULL && ! prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)) { /* It's still up-to-date, so we can use it */ *************** compile_pltcl_function(Oid fn_oid, Oid t *** 1464,1470 **** prodesc->internal_proname = pstrdup(internal_proname); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; ! prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); --- 1464,1470 ---- prodesc->internal_proname = pstrdup(internal_proname); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; ! prodesc->fn_xmin = HeapTupleGetRawXmin(procTup); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out new file mode 100644 index 0478a8a..487666e *** a/src/test/regress/expected/alter_table.out --- b/src/test/regress/expected/alter_table.out *************** from pg_locks l join pg_class c on l.rel *** 2204,2210 **** where virtualtransaction = ( select virtualtransaction from pg_locks ! where transactionid = txid_current()::integer) and locktype = 'relation' and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') and c.relname != 'my_locks' --- 2204,2210 ---- where virtualtransaction = ( select virtualtransaction from pg_locks ! where transactionid = txid_current()) and locktype = 'relation' and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') and c.relname != 'my_locks' *************** from pg_locks l join pg_class c on l.rel *** 2367,2373 **** where virtualtransaction = ( select virtualtransaction from pg_locks ! where transactionid = txid_current()::integer) and locktype = 'relation' and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') and c.relname = 'my_locks' --- 2367,2373 ---- where virtualtransaction = ( select virtualtransaction from pg_locks ! where transactionid = txid_current()) and locktype = 'relation' and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') and c.relname = 'my_locks' diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out new file mode 100644 index fcf8bd7..776a328 *** a/src/test/regress/expected/opr_sanity.out --- b/src/test/regress/expected/opr_sanity.out *************** WHERE p1.oid != p2.oid AND *** 223,229 **** ORDER BY 1, 2; proargtypes | proargtypes -------------+------------- ! 23 | 28 1114 | 1184 1560 | 1562 (3 rows) --- 223,229 ---- ORDER BY 1, 2; proargtypes | proargtypes -------------+------------- ! 20 | 28 1114 | 1184 1560 | 1562 (3 rows) *************** interval_gt(interval,interval) *** 641,647 **** charlt("char","char") tidne(tid,tid) tideq(tid,tid) ! xideqint4(xid,integer) timetz_eq(time with time zone,time with time zone) timetz_ne(time with time zone,time with time zone) timetz_lt(time with time zone,time with time zone) --- 641,647 ---- charlt("char","char") tidne(tid,tid) tideq(tid,tid) ! xideqint8(xid,bigint) timetz_eq(time with time zone,time with time zone) timetz_ne(time with time zone,time with time zone) timetz_lt(time with time zone,time with time zone) *************** uuid_ge(uuid,uuid) *** 710,716 **** uuid_gt(uuid,uuid) uuid_ne(uuid,uuid) xidneq(xid,xid) ! xidneqint4(xid,integer) macaddr8_eq(macaddr8,macaddr8) macaddr8_lt(macaddr8,macaddr8) macaddr8_le(macaddr8,macaddr8) --- 710,716 ---- uuid_gt(uuid,uuid) uuid_ne(uuid,uuid) xidneq(xid,xid) ! xidneqint8(xid,bigint) macaddr8_eq(macaddr8,macaddr8) macaddr8_lt(macaddr8,macaddr8) macaddr8_le(macaddr8,macaddr8) diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c new file mode 100644 index 734947c..a83133e *** a/src/test/regress/regress.c --- b/src/test/regress/regress.c *************** make_tuple_indirect(PG_FUNCTION_ARGS) *** 752,757 **** --- 752,758 ---- tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = rec; values = (Datum *) palloc(ncolumns * sizeof(Datum)); diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql new file mode 100644 index 37cca72..2a4ccf3 *** a/src/test/regress/sql/alter_table.sql --- b/src/test/regress/sql/alter_table.sql *************** from pg_locks l join pg_class c on l.rel *** 1441,1447 **** where virtualtransaction = ( select virtualtransaction from pg_locks ! where transactionid = txid_current()::integer) and locktype = 'relation' and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') and c.relname != 'my_locks' --- 1441,1447 ---- where virtualtransaction = ( select virtualtransaction from pg_locks ! where transactionid = txid_current()) and locktype = 'relation' and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') and c.relname != 'my_locks' *************** from pg_locks l join pg_class c on l.rel *** 1528,1534 **** where virtualtransaction = ( select virtualtransaction from pg_locks ! where transactionid = txid_current()::integer) and locktype = 'relation' and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') and c.relname = 'my_locks' --- 1528,1534 ---- where virtualtransaction = ( select virtualtransaction from pg_locks ! where transactionid = txid_current()) and locktype = 'relation' and relnamespace != (select oid from pg_namespace where nspname = 'pg_catalog') and c.relname = 'my_locks'