From 6a9c69a3690e5e503417928d64547e1a920c193e Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 7 Jun 2022 15:48:12 +0400 Subject: [PATCH v38 8/8] Use 64-bit XIDs - change TransactionId to 64bit - disk tuple format (HeapTupleHeader) is (almost) unchanged: xmin and xmax remains 32bit -- now 32bit xid is named ShortTransactionId - heap page format is changed to contain xid and multixact base value, tuple's xmin and xmax are offsets from. -- xid_base and multi_base are stored as a page special data. PageHeader remains unmodified. - in-memory tuple (HeapTuple) is enriched with 64bit xmin/xmax precalculated from 32-bit xmin/xmax (in tuple header) and 64-bit xid_base/multi_base stored in PageSpecial. Authors: - Alexander Korotkov - Teodor Sigaev - Nikita Glukhov - Maxim Orlov - Pavel Borisov - Yura Sokolov - Aleksander Alekseev Discussion: https://postgr.es/m/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com Discussion: https://postgr.es/m/CAJ7c6TPDOYBYrnCAeyndkBktO0WG2xSdYduTF0nxq%2BvfkmTF5Q%40mail.gmail.com --- contrib/amcheck/verify_heapam.c | 77 +- contrib/amcheck/verify_nbtree.c | 2 +- contrib/hstore/hstore_io.c | 2 + contrib/pageinspect/Makefile | 3 +- contrib/pageinspect/btreefuncs.c | 22 +- contrib/pageinspect/expected/btree.out | 4 +- contrib/pageinspect/expected/hash_1.out | 166 +++ .../pageinspect/expected/oldextversions.out | 10 +- contrib/pageinspect/expected/page.out | 28 +- contrib/pageinspect/heapfuncs.c | 9 +- .../pageinspect/pageinspect--1.10--1.11.sql | 145 +++ contrib/pageinspect/pageinspect--1.5.sql | 2 + contrib/pageinspect/pageinspect.control | 2 +- contrib/pageinspect/rawpage.c | 25 +- contrib/pageinspect/sql/btree.sql | 3 +- contrib/pg_surgery/heap_surgery.c | 12 +- .../pg_visibility/expected/pg_visibility.out | 17 + contrib/pg_visibility/pg_visibility.c | 5 +- contrib/pg_visibility/sql/pg_visibility.sql | 18 + contrib/pgrowlocks/pgrowlocks.c | 2 +- contrib/pgstattuple/pgstatapprox.c | 1 + contrib/pgstattuple/pgstatindex.c | 2 +- .../postgres_fdw/expected/postgres_fdw.out | 55 +- contrib/postgres_fdw/postgres_fdw.c | 9 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 15 +- src/backend/access/common/heaptuple.c | 8 +- src/backend/access/common/reloptions.c | 118 +- src/backend/access/hash/hashvalidate.c | 5 +- src/backend/access/heap/heapam.c | 1004 ++++++++++++++--- src/backend/access/heap/heapam_handler.c | 28 +- src/backend/access/heap/heapam_visibility.c | 173 +-- src/backend/access/heap/heaptoast.c | 3 + src/backend/access/heap/hio.c | 19 +- src/backend/access/heap/pruneheap.c | 60 +- src/backend/access/heap/rewriteheap.c | 85 +- src/backend/access/heap/vacuumlazy.c | 140 +-- src/backend/access/nbtree/nbtpage.c | 2 + src/backend/access/nbtree/nbtsplitloc.c | 16 +- src/backend/access/nbtree/nbtxlog.c | 2 + src/backend/access/rmgrdesc/gistdesc.c | 4 +- src/backend/access/rmgrdesc/heapdesc.c | 32 + src/backend/access/rmgrdesc/mxactdesc.c | 9 +- src/backend/access/rmgrdesc/nbtdesc.c | 4 +- src/backend/access/rmgrdesc/xactdesc.c | 6 +- src/backend/access/rmgrdesc/xlogdesc.c | 6 +- src/backend/access/transam/clog.c | 22 +- src/backend/access/transam/commit_ts.c | 19 - src/backend/access/transam/multixact.c | 686 +---------- src/backend/access/transam/slru.c | 13 +- src/backend/access/transam/subtrans.c | 9 +- src/backend/access/transam/transam.c | 18 +- src/backend/access/transam/twophase.c | 9 +- src/backend/access/transam/varsup.c | 153 +-- src/backend/access/transam/xact.c | 35 +- src/backend/access/transam/xlog.c | 10 +- src/backend/access/transam/xloginsert.c | 7 + src/backend/access/transam/xlogreader.c | 34 - src/backend/access/transam/xlogrecovery.c | 2 +- src/backend/bootstrap/bootstrap.c | 26 +- src/backend/catalog/heap.c | 8 +- src/backend/catalog/pg_inherits.c | 2 +- src/backend/commands/async.c | 2 +- src/backend/commands/dbcommands.c | 1 + src/backend/commands/sequence.c | 22 +- src/backend/commands/vacuum.c | 97 +- src/backend/executor/execExprInterp.c | 1 + src/backend/executor/execUtils.c | 1 + src/backend/executor/nodeModifyTable.c | 1 + src/backend/executor/spi.c | 1 + src/backend/nodes/copyfuncs.c | 1 + src/backend/nodes/equalfuncs.c | 8 + src/backend/nodes/list.c | 20 + src/backend/nodes/outfuncs.c | 10 +- src/backend/optimizer/util/plancat.c | 2 +- src/backend/postmaster/autovacuum.c | 72 +- src/backend/replication/logical/decode.c | 18 +- src/backend/replication/logical/proto.c | 50 +- .../replication/logical/reorderbuffer.c | 17 +- src/backend/replication/logical/worker.c | 2 +- src/backend/replication/pgoutput/pgoutput.c | 7 +- src/backend/replication/walreceiver.c | 28 +- src/backend/replication/walsender.c | 63 +- src/backend/statistics/extended_stats.c | 1 + src/backend/storage/buffer/Makefile | 3 +- src/backend/storage/buffer/bufmgr.c | 81 +- src/backend/storage/buffer/heap_convert.c | 516 +++++++++ src/backend/storage/ipc/procarray.c | 100 +- src/backend/storage/ipc/standby.c | 6 +- src/backend/storage/lmgr/lmgr.c | 16 +- src/backend/storage/lmgr/predicate.c | 4 +- src/backend/storage/page/bufpage.c | 209 +++- src/backend/tcop/postgres.c | 25 +- src/backend/utils/adt/enum.c | 2 +- src/backend/utils/adt/jsonfuncs.c | 1 + src/backend/utils/adt/lockfuncs.c | 9 +- src/backend/utils/adt/pgstatfuncs.c | 1 + src/backend/utils/adt/rowtypes.c | 12 + src/backend/utils/adt/xid.c | 37 +- src/backend/utils/adt/xid8funcs.c | 83 +- src/backend/utils/cache/catcache.c | 1 + src/backend/utils/cache/relcache.c | 3 +- src/backend/utils/fmgr/fmgr.c | 4 +- src/backend/utils/misc/guc.c | 184 +-- src/backend/utils/misc/help_config.c | 8 +- src/backend/utils/misc/pg_controldata.c | 2 +- src/backend/utils/misc/postgresql.conf.sample | 4 +- src/backend/utils/sort/tuplesort.c | 8 +- src/backend/utils/time/combocid.c | 18 +- src/backend/utils/time/snapmgr.c | 9 +- src/bin/initdb/initdb.c | 60 +- src/bin/initdb/t/001_initdb.pl | 12 +- src/bin/pg_amcheck/t/004_verify_heapam.pl | 180 ++- src/bin/pg_controldata/pg_controldata.c | 2 +- src/bin/pg_dump/pg_dump.c | 20 +- src/bin/pg_dump/pg_dump.h | 8 +- src/bin/pg_resetwal/pg_resetwal.c | 56 +- src/bin/pg_upgrade/Makefile | 1 + src/bin/pg_upgrade/check.c | 114 ++ src/bin/pg_upgrade/controldata.c | 17 +- src/bin/pg_upgrade/file.c | 168 ++- src/bin/pg_upgrade/pg_upgrade.c | 159 ++- src/bin/pg_upgrade/pg_upgrade.h | 23 +- src/bin/pg_upgrade/segresize.c | 586 ++++++++++ src/bin/pg_upgrade/t/002_pg_upgrade.pl | 22 +- src/bin/pg_upgrade/version.c | 104 +- src/bin/pg_verifybackup/t/003_corruption.pl | 2 +- src/bin/pg_waldump/pg_waldump.c | 2 +- src/include/access/clog.h | 2 +- src/include/access/ginblock.h | 11 +- src/include/access/gist.h | 2 +- src/include/access/heapam.h | 17 +- src/include/access/heapam_xlog.h | 22 +- src/include/access/heaptoast.h | 2 +- src/include/access/htup.h | 18 +- src/include/access/htup_details.h | 187 ++- src/include/access/multixact.h | 11 +- src/include/access/nbtree.h | 10 + src/include/access/rewriteheap.h | 4 +- src/include/access/rmgrlist.h | 1 + src/include/access/slru.h | 10 +- src/include/access/tableam.h | 2 +- src/include/access/transam.h | 87 +- src/include/access/tupmacs.h | 3 +- src/include/access/xact.h | 13 +- src/include/access/xloginsert.h | 1 + src/include/access/xlogreader.h | 4 - src/include/access/xlogrecord.h | 5 +- src/include/c.h | 27 +- src/include/catalog/pg_amproc.dat | 4 +- src/include/catalog/pg_control.h | 6 + src/include/catalog/pg_operator.dat | 8 +- src/include/catalog/pg_proc.dat | 12 +- src/include/catalog/pg_type.dat | 4 +- src/include/catalog/pg_type.h | 5 + src/include/commands/vacuum.h | 28 +- src/include/fmgr.h | 2 + src/include/nodes/nodes.h | 1 + src/include/nodes/pg_list.h | 4 + src/include/pg_config.h.in | 3 + src/include/postgres.h | 6 +- src/include/postmaster/autovacuum.h | 4 +- src/include/storage/buf_internals.h | 5 +- src/include/storage/bufmgr.h | 4 + src/include/storage/bufpage.h | 129 ++- src/include/storage/itemid.h | 2 + src/include/storage/lock.h | 14 +- src/include/storage/standby.h | 2 +- src/include/utils/combocid.h | 2 +- src/include/utils/rel.h | 12 +- src/include/utils/xid8.h | 4 +- src/pl/plperl/plperl.c | 4 +- src/pl/plpgsql/src/pl_comp.c | 4 +- src/pl/plpgsql/src/pl_exec.c | 2 + src/pl/plpython/plpy_procedure.c | 4 +- src/pl/tcl/pltcl.c | 4 +- src/test/Makefile | 3 +- src/test/perl/PostgreSQL/Test/Cluster.pm | 4 +- src/test/recovery/t/003_recovery_targets.pl | 2 +- src/test/regress/expected/indirect_toast.out | 8 + src/test/regress/expected/insert.out | 16 +- src/test/regress/expected/opr_sanity.out | 6 +- src/test/regress/expected/select_views.out | 70 +- src/test/regress/expected/txid.out | 8 +- src/test/regress/expected/type_sanity.out | 5 +- src/test/regress/expected/xid.out | 14 +- src/test/regress/expected/xid64.out | 122 ++ src/test/regress/parallel_schedule | 2 +- src/test/regress/pg_regress.c | 2 +- src/test/regress/regress.c | 356 ++++++ src/test/regress/sql/indirect_toast.sql | 11 + src/test/regress/sql/insert.sql | 17 +- src/test/regress/sql/select_views.sql | 2 +- src/test/regress/sql/type_sanity.sql | 5 +- src/test/regress/sql/xid64.sql | 97 ++ src/test/xid-64/Makefile | 22 + src/test/xid-64/README | 16 + src/test/xid-64/t/001_test_large_xids.pl | 54 + src/test/xid-64/t/002_test_gucs.pl | 79 ++ src/test/xid-64/t/003_test_integrity.pl | 58 + src/test/xid-64/t/004_test_relminmxid.pl | 90 ++ src/test/xid-64/t/005_stream_subxact.pl | 100 ++ src/test/xid-64/t/006_zeropage.pl | 33 + src/tools/msvc/Solution.pm | 1 + src/tools/pgindent/typedefs.list | 4 +- 204 files changed, 6169 insertions(+), 2476 deletions(-) create mode 100644 contrib/pageinspect/expected/hash_1.out create mode 100644 contrib/pageinspect/pageinspect--1.10--1.11.sql create mode 100644 src/backend/storage/buffer/heap_convert.c create mode 100644 src/bin/pg_upgrade/segresize.c create mode 100644 src/test/regress/expected/xid64.out create mode 100644 src/test/regress/sql/xid64.sql create mode 100644 src/test/xid-64/Makefile create mode 100644 src/test/xid-64/README create mode 100644 src/test/xid-64/t/001_test_large_xids.pl create mode 100644 src/test/xid-64/t/002_test_gucs.pl create mode 100644 src/test/xid-64/t/003_test_integrity.pl create mode 100644 src/test/xid-64/t/004_test_relminmxid.pl create mode 100644 src/test/xid-64/t/005_stream_subxact.pl create mode 100644 src/test/xid-64/t/006_zeropage.pl diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 823977856a2..fd2b2666ad8 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -85,7 +85,7 @@ typedef struct HeapCheckContext * from them. */ FullTransactionId next_fxid; /* ShmemVariableCache->nextXid */ - TransactionId next_xid; /* 32-bit version of next_fxid */ + TransactionId next_xid; /* 64-bit version of next_fxid */ TransactionId oldest_xid; /* ShmemVariableCache->oldestXid */ FullTransactionId oldest_fxid; /* 64-bit version of oldest_xid, computed * relative to next_fxid */ @@ -126,6 +126,7 @@ typedef struct HeapCheckContext uint16 lp_len; uint16 lp_off; HeapTupleHeader tuphdr; + HeapTupleData tuple; int natts; /* Values for iterating over attributes within the tuple */ @@ -165,8 +166,6 @@ static bool check_tuple_visibility(HeapCheckContext *ctx); static void report_corruption(HeapCheckContext *ctx, char *msg); static void report_toast_corruption(HeapCheckContext *ctx, ToastedAttribute *ta, char *msg); -static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid, - const HeapCheckContext *ctx); static void update_cached_xid_range(HeapCheckContext *ctx); static void update_cached_mxid_range(HeapCheckContext *ctx); static XidBoundsViolation check_mxid_in_range(MultiXactId mxid, @@ -390,7 +389,7 @@ verify_heapam(PG_FUNCTION_ARGS) update_cached_xid_range(&ctx); update_cached_mxid_range(&ctx); ctx.relfrozenxid = ctx.rel->rd_rel->relfrozenxid; - ctx.relfrozenfxid = FullTransactionIdFromXidAndCtx(ctx.relfrozenxid, &ctx); + ctx.relfrozenfxid = FullTransactionIdFromXid(ctx.relfrozenxid); ctx.relminmxid = ctx.rel->rd_rel->relminmxid; if (TransactionIdIsNormal(ctx.relfrozenxid)) @@ -505,6 +504,11 @@ verify_heapam(PG_FUNCTION_ARGS) ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid); ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr); + ctx.tuple.t_data = ctx.tuphdr; + ctx.tuple.t_len = ItemIdGetLength(ctx.itemid); + ctx.tuple.t_tableOid = RelationGetRelid(ctx.rel); + HeapTupleCopyBaseFromPage(&ctx.tuple, ctx.page); + /* Ok, ready to check this next tuple */ check_tuple(&ctx); } @@ -730,12 +734,13 @@ check_tuple_visibility(HeapCheckContext *ctx) XidCommitStatus xmin_status; XidCommitStatus xvac_status; XidCommitStatus xmax_status; + HeapTuple tuple = &ctx->tuple; HeapTupleHeader tuphdr = ctx->tuphdr; ctx->tuple_could_be_pruned = true; /* have not yet proven otherwise */ /* If xmin is normal, it should be within valid range */ - xmin = HeapTupleHeaderGetXmin(tuphdr); + xmin = HeapTupleGetXmin(tuple); switch (get_xid_status(xmin, ctx, &xmin_status)) { case XID_INVALID: @@ -745,19 +750,19 @@ check_tuple_visibility(HeapCheckContext *ctx) report_corruption(ctx, psprintf("xmin %llu equals or exceeds next valid transaction ID %llu", (unsigned long long) xmin, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("xmin %llu precedes oldest valid transaction ID %llu", (unsigned long long) xmin, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("xmin %llu precedes relation freeze threshold %llu", (unsigned long long) xmin, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; } @@ -783,19 +788,19 @@ check_tuple_visibility(HeapCheckContext *ctx) report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple equals or exceeds next valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple precedes relation freeze threshold %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple precedes oldest valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_BOUNDS_OK: break; @@ -849,19 +854,19 @@ check_tuple_visibility(HeapCheckContext *ctx) report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple equals or exceeds next valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple precedes relation freeze threshold %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple precedes oldest valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_BOUNDS_OK: break; @@ -938,7 +943,7 @@ check_tuple_visibility(HeapCheckContext *ctx) * HEAP_XMAX_IS_LOCKED_ONLY is true, but for now we err on the side of * avoiding possibly-bogus complaints about missing TOAST entries. */ - xmax = HeapTupleHeaderGetRawXmax(tuphdr); + xmax = HeapTupleGetRawXmax(tuple); switch (check_mxid_valid_in_rel(xmax, ctx)) { case XID_INVALID: @@ -997,7 +1002,7 @@ check_tuple_visibility(HeapCheckContext *ctx) * We already checked above that this multixact is within limits for * this table. Now check the update xid from this multixact. */ - xmax = HeapTupleGetUpdateXid(tuphdr); + xmax = HeapTupleGetUpdateXid(tuple); switch (get_xid_status(xmax, ctx, &xmax_status)) { case XID_INVALID: @@ -1009,19 +1014,19 @@ check_tuple_visibility(HeapCheckContext *ctx) report_corruption(ctx, psprintf("update xid %llu equals or exceeds next valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return true; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("update xid %llu precedes relation freeze threshold %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return true; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("update xid %llu precedes oldest valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return true; case XID_BOUNDS_OK: break; @@ -1061,26 +1066,26 @@ check_tuple_visibility(HeapCheckContext *ctx) } /* xmax is an XID, not a MXID. Sanity check it. */ - xmax = HeapTupleHeaderGetRawXmax(tuphdr); + xmax = HeapTupleGetRawXmax(tuple); switch (get_xid_status(xmax, ctx, &xmax_status)) { case XID_IN_FUTURE: report_corruption(ctx, psprintf("xmax %llu equals or exceeds next valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; /* corrupt */ case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("xmax %llu precedes relation freeze threshold %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; /* corrupt */ case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("xmax %llu precedes oldest valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; /* corrupt */ case XID_BOUNDS_OK: case XID_INVALID: @@ -1563,24 +1568,6 @@ check_tuple(HeapCheckContext *ctx) ctx->attnum = -1; } -/* - * Convert a TransactionId into a FullTransactionId using our cached values of - * the valid transaction ID range. It is the caller's responsibility to have - * already updated the cached values, if necessary. - */ -static FullTransactionId -FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx) -{ - uint32 epoch; - - if (!TransactionIdIsNormal(xid)) - return FullTransactionIdFromEpochAndXid(0, xid); - epoch = EpochFromFullTransactionId(ctx->next_fxid); - if (xid > ctx->next_xid) - epoch--; - return FullTransactionIdFromEpochAndXid(epoch, xid); -} - /* * Update our cached range of valid transaction IDs. */ @@ -1594,7 +1581,7 @@ update_cached_xid_range(HeapCheckContext *ctx) LWLockRelease(XidGenLock); /* And compute alternate versions of the same */ - ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx); + ctx->oldest_fxid = FullTransactionIdFromXid(ctx->oldest_xid); ctx->next_xid = XidFromFullTransactionId(ctx->next_fxid); } @@ -1694,7 +1681,7 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, } /* Check if the xid is within bounds */ - fxid = FullTransactionIdFromXidAndCtx(xid, ctx); + fxid = FullTransactionIdFromXid(xid); if (!fxid_in_cached_range(fxid, ctx)) { /* @@ -1703,7 +1690,6 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, * performed the full xid conversion, reconvert. */ update_cached_xid_range(ctx); - fxid = FullTransactionIdFromXidAndCtx(xid, ctx); } if (FullTransactionIdPrecedesOrEquals(ctx->next_fxid, fxid)) @@ -1727,8 +1713,7 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, *status = XID_COMMITTED; LWLockAcquire(XactTruncationLock, LW_SHARED); clog_horizon = - FullTransactionIdFromXidAndCtx(ShmemVariableCache->oldestClogXid, - ctx); + FullTransactionIdFromXid(ShmemVariableCache->oldestClogXid); if (FullTransactionIdPrecedesOrEquals(clog_horizon, fxid)) { if (TransactionIdIsCurrentTransactionId(xid)) diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 2beeebb1635..67f5208ac31 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -525,7 +525,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, * avoid this. */ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(rel->rd_indextuple), snapshot->xmin)) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index b3304ff8445..f54741330c5 100644 --- a/contrib/hstore/hstore_io.c +++ b/contrib/hstore/hstore_io.c @@ -859,6 +859,7 @@ hstore_from_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; + HeapTupleSetZeroBase(&tuple); values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); @@ -1012,6 +1013,7 @@ hstore_populate_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; + HeapTupleSetZeroBase(&tuple); } /* diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile index 5c0736564ab..5ca80c9d766 100644 --- a/contrib/pageinspect/Makefile +++ b/contrib/pageinspect/Makefile @@ -13,7 +13,8 @@ OBJS = \ rawpage.o EXTENSION = pageinspect -DATA = pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \ +DATA = pageinspect--1.10--1.11.sql \ + pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \ pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ pageinspect--1.5.sql pageinspect--1.5--1.6.sql \ pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \ diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c index 33ef9f2f454..f63151da1cf 100644 --- a/contrib/pageinspect/btreefuncs.c +++ b/contrib/pageinspect/btreefuncs.c @@ -105,9 +105,14 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) stat->page_size = PageGetPageSize(page); + stat->btpo_prev = opaque->btpo_prev; + stat->btpo_level = opaque->btpo_level; + /* page type (flags) */ if (P_ISDELETED(opaque)) { + TransactionId safexid; + /* We divide deleted pages into leaf ('d') or internal ('D') */ if (P_ISLEAF(opaque) || !P_HAS_FULLXID(opaque)) stat->type = 'd'; @@ -122,15 +127,16 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) * called "bpto"). */ if (P_HAS_FULLXID(opaque)) + safexid = XidFromFullTransactionId(BTPageGetDeleteXid(page)); + else { - FullTransactionId safexid = BTPageGetDeleteXid(page); - - elog(DEBUG2, "deleted page from block %u has safexid %llu", - blkno, (unsigned long long) U64FromFullTransactionId(safexid)); + safexid = BTP_GET_XACT(opaque); + stat->btpo_prev = 0; + stat->btpo_level = 0; } - else - elog(DEBUG2, "deleted page from block %u has safexid %u", - blkno, opaque->btpo_level); + + elog(DEBUG2, "deleted page from block %u has safexid %llu", + blkno, (unsigned long long) safexid); /* Don't interpret BTDeletedPageData as index tuples */ maxoff = InvalidOffsetNumber; @@ -145,9 +151,7 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) stat->type = 'i'; /* btpage opaque data */ - stat->btpo_prev = opaque->btpo_prev; stat->btpo_next = opaque->btpo_next; - stat->btpo_level = opaque->btpo_level; stat->btpo_flags = opaque->btpo_flags; stat->btpo_cycleid = opaque->btpo_cycleid; diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out index 035a81a7592..5fb91224660 100644 --- a/contrib/pageinspect/expected/btree.out +++ b/contrib/pageinspect/expected/btree.out @@ -94,8 +94,8 @@ SELECT bt_page_items('aaa'::bytea); ERROR: invalid page size -- invalid special area size CREATE INDEX test1_a_brin ON test1 USING brin(a); -SELECT bt_page_items(get_raw_page('test1', 0)); -ERROR: input page is not a valid btree page +-- XXX: false positive in 64xids due to equal sizes of BTPageOpaque and HeapPageSpecialData +-- SELECT bt_page_items(get_raw_page('test1', 0)); SELECT bt_page_items(get_raw_page('test1_a_brin', 0)); ERROR: input page is not a valid btree page \set VERBOSITY default diff --git a/contrib/pageinspect/expected/hash_1.out b/contrib/pageinspect/expected/hash_1.out new file mode 100644 index 00000000000..5e64eb92602 --- /dev/null +++ b/contrib/pageinspect/expected/hash_1.out @@ -0,0 +1,166 @@ +CREATE TABLE test_hash (a int, b text); +INSERT INTO test_hash VALUES (1, 'one'); +CREATE INDEX test_hash_a_idx ON test_hash USING hash (a); +\x +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 0)); +-[ RECORD 1 ]--+--------- +hash_page_type | metapage + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 1)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 2)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 4)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 5)); +-[ RECORD 1 ]--+------- +hash_page_type | bitmap + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 6)); +ERROR: block number 6 is out of range for relation "test_hash_a_idx" +SELECT * FROM hash_bitmap_info('test_hash_a_idx', -1); +ERROR: invalid block number +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 0); +ERROR: invalid overflow block number 0 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 1); +ERROR: invalid overflow block number 1 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 2); +ERROR: invalid overflow block number 2 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 3); +ERROR: invalid overflow block number 3 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4); +ERROR: invalid overflow block number 4 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 5); +ERROR: invalid overflow block number 5 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 6); +ERROR: block number 6 is out of range for relation "test_hash_a_idx" +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 0)); +-[ RECORD 1 ]-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +magic | 105121344 +version | 4 +ntuples | 1 +bsize | 8156 +bmsize | 4096 +bmshift | 15 +maxbucket | 3 +highmask | 7 +lowmask | 3 +ovflpoint | 2 +firstfree | 0 +nmaps | 1 +procid | 450 +spares | {0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} +mapp | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} + +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 1)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 2)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 3)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 4)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash meta page +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 0)); +ERROR: page is not a hash bucket or overflow page +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 1)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 0 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 2)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 1 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]---+----------- +live_items | 1 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 2 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 4)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 3 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash bucket or overflow page +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 0)); +ERROR: page is not a hash bucket or overflow page +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 1)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 2)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]---------- +itemoffset | 1 +ctid | (0,1) +data | 2389907270 + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 4)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash bucket or overflow page +DROP TABLE test_hash; diff --git a/contrib/pageinspect/expected/oldextversions.out b/contrib/pageinspect/expected/oldextversions.out index f5c4b61bd79..00323d392d6 100644 --- a/contrib/pageinspect/expected/oldextversions.out +++ b/contrib/pageinspect/expected/oldextversions.out @@ -40,16 +40,16 @@ SELECT * FROM bt_page_items('test1_a_idx', 1); -- pagesize in pageinspect >= 1.10. ALTER EXTENSION pageinspect UPDATE TO '1.9'; \df page_header - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-------------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------ - public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT prune_xid xid | func + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------ + public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT xid_base xid, OUT multi_base xid, OUT prune_xid xid | func (1 row) SELECT pagesize, version FROM page_header(get_raw_page('test1', 0)); pagesize | version ----------+--------- - 8192 | 4 + 8192 | 5 (1 row) DROP TABLE test1; diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out index 3bdc37bbf59..5ca00378df7 100644 --- a/contrib/pageinspect/expected/page.out +++ b/contrib/pageinspect/expected/page.out @@ -48,7 +48,7 @@ SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0); SELECT pagesize, version FROM page_header(get_raw_page('test1', 0)); pagesize | version ----------+--------- - 8192 | 4 + 8192 | 5 (1 row) SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_test; @@ -69,19 +69,19 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0)); fsm_page_contents ------------------- - 0: 254 + - 1: 254 + - 3: 254 + - 7: 254 + - 15: 254 + - 31: 254 + - 63: 254 + - 127: 254 + - 255: 254 + - 511: 254 + - 1023: 254 + - 2047: 254 + - 4095: 254 + + 0: 253 + + 1: 253 + + 3: 253 + + 7: 253 + + 15: 253 + + 31: 253 + + 63: 253 + + 127: 253 + + 255: 253 + + 511: 253 + + 1023: 253 + + 2047: 253 + + 4095: 253 + fp_next_slot: 0 + (1 row) diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c index 3dd1a9bc2ab..a0939e63044 100644 --- a/contrib/pageinspect/heapfuncs.c +++ b/contrib/pageinspect/heapfuncs.c @@ -163,7 +163,7 @@ heap_page_items(PG_FUNCTION_ARGS) inter_call_data->tupd = tupdesc; inter_call_data->offset = FirstOffsetNumber; - inter_call_data->page = VARDATA(raw_page); + inter_call_data->page = get_page_from_raw(raw_page); fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page); fctx->user_fctx = inter_call_data; @@ -211,6 +211,7 @@ heap_page_items(PG_FUNCTION_ARGS) lp_offset == MAXALIGN(lp_offset) && lp_offset + lp_len <= raw_page_size) { + HeapTupleData tup; HeapTupleHeader tuphdr; bytea *tuple_data_bytea; int tuple_data_len; @@ -218,9 +219,11 @@ heap_page_items(PG_FUNCTION_ARGS) /* Extract information from the tuple header */ tuphdr = (HeapTupleHeader) PageGetItem(page, id); + tup.t_data = tuphdr; + HeapTupleCopyBaseFromPage(&tup, page); - values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr)); - values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr)); + values[4] = TransactionIdGetDatum(HeapTupleGetXmin(&tup)); + values[5] = TransactionIdGetDatum(HeapTupleGetRawXmax(&tup)); /* shared with xvac */ values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); values[7] = PointerGetDatum(&tuphdr->t_ctid); diff --git a/contrib/pageinspect/pageinspect--1.10--1.11.sql b/contrib/pageinspect/pageinspect--1.10--1.11.sql new file mode 100644 index 00000000000..236f18aa2f8 --- /dev/null +++ b/contrib/pageinspect/pageinspect--1.10--1.11.sql @@ -0,0 +1,145 @@ +/* contrib/pageinspect/pageinspect--1.10--1.11.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.11'" to load this file. \quit + +-- +-- gist_page_opaque_info() +-- +DROP FUNCTION gist_page_opaque_info(bytea); +CREATE FUNCTION gist_page_opaque_info(IN page bytea, + OUT lsn pg_lsn, + OUT nsn pg_lsn, + OUT rightlink bigint, + OUT flags text[]) +AS 'MODULE_PATHNAME', 'gist_page_opaque_info' +LANGUAGE C STRICT PARALLEL SAFE; + + +-- +-- gist_page_items_bytea() +-- +DROP FUNCTION gist_page_items_bytea(bytea); +CREATE FUNCTION gist_page_items_bytea(IN page bytea, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT dead boolean, + OUT key_data bytea) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gist_page_items_bytea' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- gist_page_items() +-- +DROP FUNCTION gist_page_items(bytea, regclass); +CREATE FUNCTION gist_page_items(IN page bytea, + IN index_oid regclass, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT dead boolean, + OUT keys text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gist_page_items' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- get_raw_page() +-- +DROP FUNCTION get_raw_page(text, int8); +DROP FUNCTION IF EXISTS get_raw_page(text, int4); +CREATE FUNCTION get_raw_page(text, int8) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +DROP FUNCTION get_raw_page(text, text, int8); +DROP FUNCTION IF EXISTS get_raw_page(text, text, int4); +CREATE FUNCTION get_raw_page(text, text, int8) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_fork_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- page_checksum() +-- +DROP FUNCTION page_checksum(IN page bytea, IN blkno int8); +DROP FUNCTION IF EXISTS page_checksum(IN page bytea, IN blkno int4); +CREATE FUNCTION page_checksum(IN page bytea, IN blkno int8) +RETURNS smallint +AS 'MODULE_PATHNAME', 'page_checksum_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_metap() +-- +DROP FUNCTION bt_metap(text); +CREATE FUNCTION bt_metap(IN relname text, + OUT magic int4, + OUT version int4, + OUT root int8, + OUT level int8, + OUT fastroot int8, + OUT fastlevel int8, + OUT last_cleanup_num_delpages int8, + OUT last_cleanup_num_tuples float8, + OUT allequalimage boolean) +AS 'MODULE_PATHNAME', 'bt_metap' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_page_stats() +-- +DROP FUNCTION bt_page_stats(text, int8); +DROP FUNCTION IF EXISTS bt_page_stats(text, int4); +CREATE FUNCTION bt_page_stats(IN relname text, IN blkno int8, + OUT blkno int8, + OUT type "char", + OUT live_items int4, + OUT dead_items int4, + OUT avg_item_size int4, + OUT page_size int4, + OUT free_size int4, + OUT btpo_prev int8, + OUT btpo_next int8, + OUT btpo_level int8, + OUT btpo_flags int4) +AS 'MODULE_PATHNAME', 'bt_page_stats_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_page_items() +-- +DROP FUNCTION bt_page_items(text, int8); +DROP FUNCTION IF EXISTS bt_page_items(text, int4); +CREATE FUNCTION bt_page_items(IN relname text, IN blkno int8, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT nulls bool, + OUT vars bool, + OUT data text, + OUT dead boolean, + OUT htid tid, + OUT tids tid[]) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'bt_page_items_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- brin_page_items() +-- +DROP FUNCTION brin_page_items(IN page bytea, IN index_oid regclass); +CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid regclass, + OUT itemoffset int, + OUT blknum int8, + OUT attnum int, + OUT allnulls bool, + OUT hasnulls bool, + OUT placeholder bool, + OUT value text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'brin_page_items' +LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/pageinspect--1.5.sql b/contrib/pageinspect/pageinspect--1.5.sql index 1e40c3c97e2..fdbd2995a22 100644 --- a/contrib/pageinspect/pageinspect--1.5.sql +++ b/contrib/pageinspect/pageinspect--1.5.sql @@ -28,6 +28,8 @@ CREATE FUNCTION page_header(IN page bytea, OUT special smallint, OUT pagesize smallint, OUT version smallint, + OUT xid_base xid, + OUT multi_base xid, OUT prune_xid xid) AS 'MODULE_PATHNAME', 'page_header' LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/pageinspect.control b/contrib/pageinspect/pageinspect.control index 7cdf37913da..f277413dd8c 100644 --- a/contrib/pageinspect/pageinspect.control +++ b/contrib/pageinspect/pageinspect.control @@ -1,5 +1,5 @@ # pageinspect extension comment = 'inspect the contents of database pages at a low level' -default_version = '1.10' +default_version = '1.11' module_pathname = '$libdir/pageinspect' relocatable = true diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c index 730a46b1d84..fc3560e795d 100644 --- a/contrib/pageinspect/rawpage.c +++ b/contrib/pageinspect/rawpage.c @@ -17,6 +17,7 @@ #include "access/htup_details.h" #include "access/relation.h" +#include "commands/sequence.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" #include "funcapi.h" @@ -251,8 +252,8 @@ page_header(PG_FUNCTION_ARGS) Datum result; HeapTuple tuple; - Datum values[9]; - bool nulls[9]; + Datum values[11]; + bool nulls[11]; PageHeader page; XLogRecPtr lsn; @@ -312,12 +313,30 @@ page_header(PG_FUNCTION_ARGS) } values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page)); - values[8] = TransactionIdGetDatum(page->pd_prune_xid); /* Build and return the tuple. */ memset(nulls, 0, sizeof(nulls)); + if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HeapPageSpecialData))) + { + HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); + + values[8] = TransactionIdGetDatum(pageSpecial->pd_xid_base); + values[9] = TransactionIdGetDatum(pageSpecial->pd_multi_base); + values[10] = TransactionIdGetDatum(HeapPageGetPruneXidNoAssert((Page) page)); + nulls[8] = false; + nulls[9] = false; + nulls[10] = false; + } + else + { + nulls[8] = false; + values[8] = TransactionIdGetDatum(HeapPageGetPruneXidNoAssert((Page) page)); + nulls[9] = true; + nulls[10] = true; + } + tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); diff --git a/contrib/pageinspect/sql/btree.sql b/contrib/pageinspect/sql/btree.sql index 1f554f0f678..538d71d23a1 100644 --- a/contrib/pageinspect/sql/btree.sql +++ b/contrib/pageinspect/sql/btree.sql @@ -40,7 +40,8 @@ SELECT bt_page_items(get_raw_page('test1_b_gist', 0)); SELECT bt_page_items('aaa'::bytea); -- invalid special area size CREATE INDEX test1_a_brin ON test1 USING brin(a); -SELECT bt_page_items(get_raw_page('test1', 0)); +-- XXX: false positive in 64xids due to equal sizes of BTPageOpaque and HeapPageSpecialData +-- SELECT bt_page_items(get_raw_page('test1', 0)); SELECT bt_page_items(get_raw_page('test1_a_brin', 0)); \set VERBOSITY default diff --git a/contrib/pg_surgery/heap_surgery.c b/contrib/pg_surgery/heap_surgery.c index 3e641aa6440..3554f075db3 100644 --- a/contrib/pg_surgery/heap_surgery.c +++ b/contrib/pg_surgery/heap_surgery.c @@ -271,11 +271,17 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt) else { HeapTupleHeader htup; + HeapTupleData tuple; Assert(heap_force_opt == HEAP_FORCE_FREEZE); htup = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_data = htup; + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyBaseFromPage(&tuple, page); + /* * Reset all visibility-related fields of the tuple. This * logic should mimic heap_execute_freeze_tuple(), but we @@ -283,8 +289,10 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt) * potentially-garbled data is left behind. */ ItemPointerSet(&htup->t_ctid, blkno, curoff); - HeapTupleHeaderSetXmin(htup, FrozenTransactionId); - HeapTupleHeaderSetXmax(htup, InvalidTransactionId); + HeapTupleSetXmin(&tuple, FrozenTransactionId); + HeapTupleHeaderSetXmin(page, &tuple); + HeapTupleSetXmax(&tuple, InvalidTransactionId); + HeapTupleHeaderSetXmax(page, &tuple); if (htup->t_infomask & HEAP_MOVED) { if (htup->t_infomask & HEAP_MOVED_OFF) diff --git a/contrib/pg_visibility/expected/pg_visibility.out b/contrib/pg_visibility/expected/pg_visibility.out index 9de54db2a29..d3c893b4e3c 100644 --- a/contrib/pg_visibility/expected/pg_visibility.out +++ b/contrib/pg_visibility/expected/pg_visibility.out @@ -267,6 +267,22 @@ select * from pg_check_frozen('copyfreeze'); -------- (0 rows) +create table vacuum_test as select 42 i; +vacuum (disable_page_skipping) vacuum_test; +-- pg_check_visible() can report false positive due to autovacuum activity. +-- To workaround this issue, repeat the call. +do $$ +declare + non_visible_count bigint; + i integer; +begin + for i in 1 .. 10 loop + if i > 1 then perform pg_sleep(1); end if; + select count(*) from pg_check_visible('vacuum_test') into non_visible_count; + if non_visible_count = 0 then exit; end if; + end loop; + if non_visible_count > 0 then raise exception 'The visibility map is corrupt.'; end if; +end $$; -- cleanup drop table test_partitioned; drop view test_view; @@ -277,3 +293,4 @@ drop foreign data wrapper dummy; drop materialized view matview_visibility_test; drop table regular_table; drop table copyfreeze; +drop table vacuum_test; diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 1853c354e3d..4bc80869c02 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -657,6 +657,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = relid; + HeapTupleCopyBaseFromPage(&tuple, page); /* * If we're checking whether the page is all-visible, we expect @@ -700,7 +701,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) */ if (check_frozen) { - if (heap_tuple_needs_eventual_freeze(tuple.t_data)) + if (heap_tuple_needs_eventual_freeze(&tuple)) record_corrupt_item(items, &tuple.t_self); } } @@ -763,7 +764,7 @@ tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer) * be set here. So just check the xmin. */ - xmin = HeapTupleHeaderGetXmin(tup->t_data); + xmin = HeapTupleGetXmin(tup); if (!TransactionIdPrecedes(xmin, OldestXmin)) return false; /* xmin not old enough for all to see */ diff --git a/contrib/pg_visibility/sql/pg_visibility.sql b/contrib/pg_visibility/sql/pg_visibility.sql index ff3538f9964..a0d9525df9b 100644 --- a/contrib/pg_visibility/sql/pg_visibility.sql +++ b/contrib/pg_visibility/sql/pg_visibility.sql @@ -170,6 +170,23 @@ commit; select * from pg_visibility_map('copyfreeze'); select * from pg_check_frozen('copyfreeze'); +create table vacuum_test as select 42 i; +vacuum (disable_page_skipping) vacuum_test; +-- pg_check_visible() can report false positive due to autovacuum activity. +-- To workaround this issue, repeat the call. +do $$ +declare + non_visible_count bigint; + i integer; +begin + for i in 1 .. 10 loop + if i > 1 then perform pg_sleep(1); end if; + select count(*) from pg_check_visible('vacuum_test') into non_visible_count; + if non_visible_count = 0 then exit; end if; + end loop; + if non_visible_count > 0 then raise exception 'The visibility map is corrupt.'; end if; +end $$; + -- cleanup drop table test_partitioned; drop view test_view; @@ -180,3 +197,4 @@ drop foreign data wrapper dummy; drop materialized view matview_visibility_test; drop table regular_table; drop table copyfreeze; +drop table vacuum_test; diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c index ef89b84ec31..0abf1ea21a4 100644 --- a/contrib/pgrowlocks/pgrowlocks.c +++ b/contrib/pgrowlocks/pgrowlocks.c @@ -130,7 +130,7 @@ pgrowlocks(PG_FUNCTION_ARGS) htsu = HeapTupleSatisfiesUpdate(tuple, GetCurrentCommandId(false), hscan->rs_cbuf); - xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); + xmax = HeapTupleGetRawXmax(tuple); infomask = tuple->t_data->t_infomask; /* diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index 15ddc322392..6d9e481013d 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -153,6 +153,7 @@ statapprox_heap(Relation rel, output_type *stat) tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyBaseFromPage(&tuple, page); /* * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c index e1048e47ff3..aff4a0bf2bb 100644 --- a/contrib/pgstattuple/pgstatindex.c +++ b/contrib/pgstattuple/pgstatindex.c @@ -605,7 +605,7 @@ pgstathashindex(PG_FUNCTION_ARGS) metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); stats.version = metap->hashm_version; - stats.space_per_page = metap->hashm_bsize; + stats.space_per_page = BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(HashPageOpaqueData)); _hash_relbuf(rel, metabuf); /* Get the current relation length */ diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 44457f930c2..27a8b42f763 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -4467,16 +4467,24 @@ UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; EXPLAIN (verbose, costs off) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------- - Update on public.ft2 - Output: c1, c2, c3, c4, c5, c6, c7, c8 - -> Foreign Update on public.ft2 - Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8 -(4 rows) +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; -- can be pushed down + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort + Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8 + Sort Key: t.c1 + CTE t + -> Update on public.ft2 + Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8 + -> Foreign Update on public.ft2 + Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8 + -> CTE Scan on t + Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8 +(10 rows) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 ------+-----+--------------------+------------------------------+--------------------------+----+------------+----- 7 | 407 | 00007_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo @@ -4596,16 +4604,24 @@ UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; EXPLAIN (verbose, costs off) - DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down - QUERY PLAN --------------------------------------------------------------------------------------------- - Delete on public.ft2 - Output: c1, c4 - -> Foreign Delete on public.ft2 - Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4 -(4 rows) + WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) + SELECT * FROM t ORDER BY c1; -- can be pushed down + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Sort + Output: t.c1, t.c4 + Sort Key: t.c1 + CTE t + -> Delete on public.ft2 + Output: ft2.c1, ft2.c4 + -> Foreign Delete on public.ft2 + Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4 + -> CTE Scan on t + Output: t.c1, t.c4 +(10 rows) -DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; +WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) +SELECT * FROM t ORDER BY c1; c1 | c4 ------+------------------------------ 5 | Tue Jan 06 00:00:00 1970 PST @@ -5866,7 +5882,8 @@ INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *; 1218 | 818 | ggg_trig_update | | | (--; | ft2 | (1 row) -UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *) +SELECT * FROM t ORDER BY c1; c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 ------+-----+------------------------+------------------------------+--------------------------+----+------------+----- 8 | 608 | 00008_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index d56951153bb..2e5748d9d52 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -4809,8 +4809,8 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate, * Note: no need to care about tableoid here because it will be * initialized in ExecProcessReturning(). */ - HeapTupleHeaderSetXmin(resultTup->t_data, InvalidTransactionId); - HeapTupleHeaderSetXmax(resultTup->t_data, InvalidTransactionId); + HeapTupleSetXmin(resultTup, InvalidTransactionId); + HeapTupleSetXmax(resultTup, InvalidTransactionId); HeapTupleHeaderSetCmin(resultTup->t_data, InvalidTransactionId); } @@ -7277,6 +7277,7 @@ make_tuple_from_result_row(PGresult *res, */ if (ctid) tuple->t_self = tuple->t_data->t_ctid = *ctid; + HeapTupleSetZeroBase(tuple); /* * Stomp on the xmin, xmax, and cmin fields from the tuple created by @@ -7286,8 +7287,8 @@ make_tuple_from_result_row(PGresult *res, * assumption. If we don't do this then, for example, the tuple length * ends up in the xmin field, which isn't what we want. */ - HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); - HeapTupleHeaderSetXmin(tuple->t_data, InvalidTransactionId); + HeapTupleSetXmax(tuple, InvalidTransactionId); + HeapTupleSetXmin(tuple, InvalidTransactionId); HeapTupleHeaderSetCmin(tuple->t_data, InvalidTransactionId); /* Clean up */ diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 92d12120272..2288a9c9255 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -1251,16 +1251,20 @@ EXPLAIN (verbose, costs off) UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; -- can be pushed down UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; EXPLAIN (verbose, costs off) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; -- can be pushed down +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; EXPLAIN (verbose, costs off) UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; -- can be pushed down UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; EXPLAIN (verbose, costs off) - DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down -DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; + WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) + SELECT * FROM t ORDER BY c1; -- can be pushed down +WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) +SELECT * FROM t ORDER BY c1; EXPLAIN (verbose, costs off) DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; -- can be pushed down DELETE FROM ft2 USING ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 2; @@ -1367,7 +1371,8 @@ CREATE TRIGGER t1_br_insert BEFORE INSERT OR UPDATE INSERT INTO ft2 (c1,c2,c3) VALUES (1208, 818, 'fff') RETURNING *; INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *; -UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *) +SELECT * FROM t ORDER BY c1; -- Test errors thrown on remote side during update ALTER TABLE "S 1"."T 1" ADD CONSTRAINT c2positive CHECK (c2 >= 0); diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 503cda46eff..4e9a8849035 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -640,10 +640,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) result = PointerGetDatum(&(tup->t_self)); break; case MinTransactionIdAttributeNumber: - result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmin(tup->t_data)); + result = TransactionIdGetDatum(HeapTupleGetRawXmin(tup)); break; case MaxTransactionIdAttributeNumber: - result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmax(tup->t_data)); + result = TransactionIdGetDatum(HeapTupleGetRawXmax(tup)); break; case MinCommandIdAttributeNumber: case MaxCommandIdAttributeNumber: @@ -688,6 +688,7 @@ heap_copytuple(HeapTuple tuple) newTuple->t_len = tuple->t_len; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyBase(newTuple, tuple); newTuple->t_data = (HeapTupleHeader) ((char *) newTuple + HEAPTUPLESIZE); memcpy((char *) newTuple->t_data, (char *) tuple->t_data, tuple->t_len); return newTuple; @@ -714,6 +715,7 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest) dest->t_len = src->t_len; dest->t_self = src->t_self; dest->t_tableOid = src->t_tableOid; + HeapTupleCopyBase(dest, src); dest->t_data = (HeapTupleHeader) palloc(src->t_len); memcpy((char *) dest->t_data, (char *) src->t_data, src->t_len); } @@ -1161,6 +1163,7 @@ heap_modify_tuple(HeapTuple tuple, newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyBase(newTuple, tuple); return newTuple; } @@ -1224,6 +1227,7 @@ heap_modify_tuple_by_cols(HeapTuple tuple, newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyBase(newTuple, tuple); return newTuple; } diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index e3b3d5a6e27..916355c3ab0 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -260,58 +260,6 @@ static relopt_int intRelOpts[] = }, -1, 1, 10000 }, - { - { - "autovacuum_freeze_min_age", - "Minimum age at which VACUUM should freeze a table row, for autovacuum", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 0, 1000000000 - }, - { - { - "autovacuum_multixact_freeze_min_age", - "Minimum multixact age at which VACUUM should freeze a row multixact's, for autovacuum", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 0, 1000000000 - }, - { - { - "autovacuum_freeze_max_age", - "Age at which to autovacuum a table to prevent transaction ID wraparound", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 100000, 2000000000 - }, - { - { - "autovacuum_multixact_freeze_max_age", - "Multixact age at which to autovacuum a table to prevent multixact wraparound", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, - -1, 10000, 2000000000 - }, - { - { - "autovacuum_freeze_table_age", - "Age at which VACUUM should perform a full table sweep to freeze row versions", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, -1, 0, 2000000000 - }, - { - { - "autovacuum_multixact_freeze_table_age", - "Age of multixact at which VACUUM should perform a full table sweep to freeze row versions", - RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, - ShareUpdateExclusiveLock - }, -1, 0, 2000000000 - }, { { "log_autovacuum_min_duration", @@ -388,6 +336,60 @@ static relopt_int intRelOpts[] = static relopt_int64 int64RelOpts[] = { + { + { + "autovacuum_freeze_min_age", + "Minimum age at which VACUUM should freeze a table row, for autovacuum", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(1000000000) + }, + { + { + "autovacuum_multixact_freeze_min_age", + "Minimum multixact age at which VACUUM should freeze a row multixact's, for autovacuum", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(1000000000) + }, + { + { + "autovacuum_freeze_max_age", + "Age at which to autovacuum a table to prevent transaction ID wraparound", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(100000), INT64CONST(2000000000) + }, + { + { + "autovacuum_multixact_freeze_max_age", + "Multixact age at which to autovacuum a table to prevent multixact wraparound", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(10000), INT64CONST(2000000000) + }, + { + { + "autovacuum_freeze_table_age", + "Age at which VACUUM should perform a full table sweep to freeze row versions", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(2000000000) + }, + { + { + "autovacuum_multixact_freeze_table_age", + "Age of multixact at which VACUUM should perform a full table sweep to freeze row versions", + RELOPT_KIND_HEAP | RELOPT_KIND_TOAST, + ShareUpdateExclusiveLock + }, + INT64CONST(-1), INT64CONST(0), INT64CONST(2000000000) + }, /* list terminator */ {{NULL}} }; @@ -1921,17 +1923,17 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind) offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, analyze_threshold)}, {"autovacuum_vacuum_cost_limit", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, vacuum_cost_limit)}, - {"autovacuum_freeze_min_age", RELOPT_TYPE_INT, + {"autovacuum_freeze_min_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_min_age)}, - {"autovacuum_freeze_max_age", RELOPT_TYPE_INT, + {"autovacuum_freeze_max_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_max_age)}, - {"autovacuum_freeze_table_age", RELOPT_TYPE_INT, + {"autovacuum_freeze_table_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, freeze_table_age)}, - {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT, + {"autovacuum_multixact_freeze_min_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_min_age)}, - {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT, + {"autovacuum_multixact_freeze_max_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_max_age)}, - {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT, + {"autovacuum_multixact_freeze_table_age", RELOPT_TYPE_INT64, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, multixact_freeze_table_age)}, {"log_autovacuum_min_duration", RELOPT_TYPE_INT, offsetof(StdRdOptions, autovacuum) + offsetof(AutoVacOpts, log_min_duration)}, diff --git a/src/backend/access/hash/hashvalidate.c b/src/backend/access/hash/hashvalidate.c index 10bf26ce7c0..83e033b93cc 100644 --- a/src/backend/access/hash/hashvalidate.c +++ b/src/backend/access/hash/hashvalidate.c @@ -317,11 +317,10 @@ check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype) * INTERNAL and allowing any such function seems too scary. */ if ((funcid == F_HASHINT4 || funcid == F_HASHINT4EXTENDED) && - (argtype == DATEOID || - argtype == XIDOID || argtype == CIDOID)) + (argtype == DATEOID || argtype == CIDOID)) /* okay, allowed use of hashint4() */ ; else if ((funcid == F_HASHINT8 || funcid == F_HASHINT8EXTENDED) && - (argtype == XID8OID)) + (argtype == XID8OID || argtype == XIDOID)) /* okay, allowed use of hashint8() */ ; else if ((funcid == F_TIMESTAMP_HASH || funcid == F_TIMESTAMP_HASH_EXTENDED) && diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 863b0e560c9..0d3fa87ac07 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -52,10 +52,14 @@ #include "access/xloginsert.h" #include "access/xlogutils.h" #include "catalog/catalog.h" +#include "catalog/index.h" +#include "catalog/namespace.h" +#include "commands/vacuum.h" #include "miscadmin.h" #include "pgstat.h" #include "port/atomics.h" #include "port/pg_bitutils.h" +#include "storage/buf_internals.h" #include "storage/bufmgr.h" #include "storage/freespace.h" #include "storage/lmgr.h" @@ -73,7 +77,7 @@ static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, - TransactionId xid, CommandId cid, int options); + CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, @@ -460,6 +464,7 @@ heapgetpage(TableScanDesc sscan, BlockNumber page) loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd); loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); loctup.t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(&loctup, dp); ItemPointerSet(&(loctup.t_self), page, lineoff); if (all_visible) @@ -676,6 +681,7 @@ heapgettup(HeapScanDesc scan, tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); return; } @@ -702,6 +708,7 @@ heapgettup(HeapScanDesc scan, tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); ItemPointerSet(&(tuple->t_self), page, lineoff); /* @@ -1001,6 +1008,7 @@ heapgettup_pagemode(HeapScanDesc scan, tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); /* check that rs_cindex is in sync */ Assert(scan->rs_cindex < scan->rs_ntuples); @@ -1023,6 +1031,7 @@ heapgettup_pagemode(HeapScanDesc scan, tuple->t_data = (HeapTupleHeader) PageGetItem((Page) dp, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyBaseFromPage(tuple, dp); ItemPointerSet(&(tuple->t_self), page, lineoff); /* @@ -1614,6 +1623,7 @@ heap_fetch(Relation relation, tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(tuple, page); /* * check tuple visibility, then release lock @@ -1622,7 +1632,7 @@ heap_fetch(Relation relation, if (valid) PredicateLockTID(relation, &(tuple->t_self), snapshot, - HeapTupleHeaderGetXmin(tuple->t_data)); + HeapTupleGetXmin(tuple)); HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot); @@ -1699,6 +1709,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Assert(TransactionIdIsValid(RecentXmin)); Assert(BufferGetBlockNumber(buffer) == blkno); + heapTuple->t_self = *tid; + /* Scan through possible multiple members of HOT-chain */ for (;;) { @@ -1734,6 +1746,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, heapTuple->t_data = (HeapTupleHeader) PageGetItem(dp, lp); heapTuple->t_len = ItemIdGetLength(lp); heapTuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(heapTuple, dp); ItemPointerSet(&heapTuple->t_self, blkno, offnum); /* @@ -1748,7 +1761,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, */ if (TransactionIdIsValid(prev_xmax) && !TransactionIdEquals(prev_xmax, - HeapTupleHeaderGetXmin(heapTuple->t_data))) + HeapTupleGetXmin(heapTuple))) break; /* @@ -1769,7 +1782,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, { ItemPointerSetOffsetNumber(tid, offnum); PredicateLockTID(relation, &heapTuple->t_self, snapshot, - HeapTupleHeaderGetXmin(heapTuple->t_data)); + HeapTupleGetXmin(heapTuple)); if (all_dead) *all_dead = false; return true; @@ -1804,7 +1817,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, blkno); offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid); at_chain_start = false; - prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); + prev_xmax = HeapTupleGetUpdateXidAny(heapTuple); } else break; /* end of chain */ @@ -1891,13 +1904,14 @@ heap_get_latest_tid(TableScanDesc sscan, tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(&tp, page); /* * After following a t_ctid link, we might arrive at an unrelated * tuple. Check for XMIN match. */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data))) + !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tp))) { UnlockReleaseBuffer(buffer); break; @@ -1916,7 +1930,7 @@ heap_get_latest_tid(TableScanDesc sscan, * If there's a valid t_ctid link, follow it, else we're done. */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || - HeapTupleHeaderIsOnlyLocked(tp.t_data) || + HeapTupleIsOnlyLocked(&tp) || HeapTupleHeaderIndicatesMovedPartitions(tp.t_data) || ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) { @@ -1925,7 +1939,7 @@ heap_get_latest_tid(TableScanDesc sscan, } ctid = tp.t_data->t_ctid; - priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data); + priorXmax = HeapTupleGetUpdateXidAny(&tp); UnlockReleaseBuffer(buffer); } /* end of loop */ } @@ -1950,7 +1964,7 @@ heap_get_latest_tid(TableScanDesc sscan, static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid) { - Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple), xid)); + Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(BufferGetPage(buffer), tuple), xid)); Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)); if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID))) @@ -2042,7 +2056,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, * Note: below this point, heaptup is the data we actually intend to store * into the relation; tup is the caller's original untoasted data. */ - heaptup = heap_prepare_insert(relation, tup, xid, cid, options); + heaptup = heap_prepare_insert(relation, tup, cid, options); /* * Find buffer to insert this tuple into. If the page is all visible, @@ -2069,6 +2083,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, */ CheckForSerializableConflictIn(relation, NULL, InvalidBlockNumber); + heap_page_prepare_for_xid(relation, buffer, xid, false); + HeapTupleSetXmin(heaptup, xid); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2150,6 +2167,10 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, } XLogBeginInsert(); + + if (info & XLOG_HEAP_INIT_PAGE) + XLogRegisterData((char *) &HeapPageGetSpecial(page)->pd_xid_base, sizeof(TransactionId)); + XLogRegisterData((char *) &xlrec, SizeOfHeapInsert); xlhdr.t_infomask2 = heaptup->t_data->t_infomask2; @@ -2204,6 +2225,486 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, } } +static void +xid_min_max(ShortTransactionId *min, ShortTransactionId *max, + ShortTransactionId xid, + bool *found) +{ + Assert(TransactionIdIsNormal(xid)); + Assert(xid <= MaxShortTransactionId); + + if (!*found) + { + *min = *max = xid; + *found = true; + } + else + { + *min = Min(*min, xid); + *max = Max(*max, xid); + } +} + +/* + * Find minimum and maximum short transaction ids which occurs in the page. + */ +static bool +heap_page_xid_min_max(Page page, bool multi, + ShortTransactionId *min, ShortTransactionId *max) +{ + bool found; + OffsetNumber offnum, + maxoff; + ItemId itemid; + HeapTupleHeader htup; + + maxoff = PageGetMaxOffsetNumber(page); + found = false; + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + if (!multi) + { + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin) && + !HeapTupleHeaderXminFrozen(htup)) + { + xid_min_max(min, max, htup->t_choice.t_heap.t_xmin, &found); + } + + if (htup->t_infomask & HEAP_XMAX_INVALID) + continue; + + if ((htup->t_infomask & HEAP_XMAX_IS_MULTI) && + (!(htup->t_infomask & HEAP_XMAX_LOCK_ONLY))) + { + TransactionId update_xid; + ShortTransactionId xid; + + update_xid = MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(page, htup), + htup->t_infomask); + xid = NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base, + update_xid); + + xid_min_max(min, max, xid, &found); + } + } + + if (!TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax)) + continue; + + if (multi != (bool) (htup->t_infomask & HEAP_XMAX_IS_MULTI)) + continue; + + xid_min_max(min, max, htup->t_choice.t_heap.t_xmax, &found); + } + + Assert(!found || (*min > InvalidTransactionId && *max <= MaxShortTransactionId)); + + return found; +} + +/* + * Shift xid base in the page. WAL-logged if buffer is specified. + */ +static void +heap_page_shift_base(Relation relation, Buffer buffer, Page page, + bool multi, int64 delta) +{ + HeapPageSpecial pageSpecial; + OffsetNumber offnum, + maxoff; + ItemId itemid; + HeapTupleHeader htup; + + START_CRIT_SECTION(); + pageSpecial = HeapPageGetSpecial(page); + + /* Iterate over page items */ + maxoff = PageGetMaxOffsetNumber(page); + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + /* Apply xid shift to heap tuple */ + if (!multi) + { + /* shift xmin */ + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin) && + !HeapTupleHeaderXminFrozen(htup)) + { + Assert(htup->t_choice.t_heap.t_xmin - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmin - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmin -= delta; + } + } + + /* shift xmax */ + if (!TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax)) + continue; + + if (multi != (bool) (htup->t_infomask & HEAP_XMAX_IS_MULTI)) + continue; + + Assert(htup->t_choice.t_heap.t_xmax - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmax - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmax -= delta; + } + + /* Apply xid shift to base as well */ + if (!multi) + pageSpecial->pd_xid_base += delta; + else + pageSpecial->pd_multi_base += delta; + + if (BufferIsValid(buffer)) + MarkBufferDirty(buffer); + + /* Write WAL record if needed */ + if (relation && RelationNeedsWAL(relation)) + { + XLogRecPtr recptr; + xl_heap_base_shift xlrec; + + xlrec.delta = delta; + xlrec.multi = multi; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, SizeOfHeapBaseShift); + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + + recptr = XLogInsert(RM_HEAP3_ID, XLOG_HEAP3_BASE_SHIFT); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); +} + +/* + * Freeze xids in the single heap page. Useful when we can't fit new xid even + * with base shift. + */ +static void +freeze_single_heap_page(Relation relation, Buffer buffer) +{ + Page page = BufferGetPage(buffer); + OffsetNumber offnum, + maxoff; + HeapTupleData tuple; + int nfrozen = 0; + xl_heap_freeze_tuple *frozen; + TransactionId OldestXmin, + FreezeXid; + MultiXactId OldestMxact, + MultiXactCutoff; + GlobalVisState *vistest; + ItemId itemid; + bool tuple_totally_frozen; + int ndeleted, + nnewlpdead; + + vacuum_set_xid_limits(relation, 0, 0, 0, 0, &OldestMxact, + &OldestXmin, &FreezeXid, &MultiXactCutoff); + + vistest = GlobalVisTestFor(relation); + + ndeleted = heap_page_prune(relation, buffer, vistest, InvalidTransactionId, 0, + &nnewlpdead, &offnum, false); + if (ndeleted > nnewlpdead) + pgstat_update_heap_dead_tuples(relation, + ndeleted - nnewlpdead); + + /* + * Now scan the page to collect vacuumable items and check for tuples + * requiring freezing. + */ + maxoff = PageGetMaxOffsetNumber(page); + frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage); + + /* + * Note: If you change anything in the loop below, also look at + * heap_page_is_all_visible to see if that needs to be changed. + */ + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + TransactionId NewRelfrozenXid; + MultiXactId NewRelminMxid; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(&tuple, page); + + /* + * Each non-removable tuple must be checked to see if it needs + * freezing. Note we already have exclusive buffer lock. + */ + if (heap_prepare_freeze_tuple(&tuple, + relation->rd_rel->relfrozenxid, + relation->rd_rel->relminmxid, + FreezeXid, MultiXactCutoff, + &frozen[nfrozen], &tuple_totally_frozen, + &NewRelfrozenXid, &NewRelminMxid)) + frozen[nfrozen++].offset = offnum; + } + + /* + * If we froze any tuples, mark the buffer dirty, and write a WAL record + * recording the changes. We must log the changes to be crash-safe + * against future truncation of CLOG. + */ + if (nfrozen > 0) + { + int i; + ItemId itemid; + HeapTupleHeader htup; + + START_CRIT_SECTION(); + + MarkBufferDirty(buffer); + + /* execute collected freezes */ + for (i = 0; i < nfrozen; i++) + { + itemid = PageGetItemId(page, frozen[i].offset); + htup = (HeapTupleHeader) PageGetItem(page, itemid); + heap_execute_freeze_tuple_page(page, htup, &frozen[i]); + } + + /* Now WAL-log freezing if necessary */ + if (RelationNeedsWAL(relation)) + { + XLogRecPtr recptr; + + recptr = log_heap_freeze(relation, buffer, FreezeXid, + frozen, nfrozen); + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + } + + pfree(frozen); + + return; +} + +static void +heap_page_apply_delta(Relation relation, Buffer buffer, Page page, + TransactionId xid, bool multi, + TransactionId base, int64 delta) +{ + Assert(xid >= base + delta + FirstNormalTransactionId); + Assert(xid <= base + delta + MaxShortTransactionId); + + heap_page_shift_base(relation, buffer, page, multi, delta); + +#ifdef USE_ASSERT_CHECKING + base = multi ? + HeapPageGetSpecial(page)->pd_multi_base : + HeapPageGetSpecial(page)->pd_xid_base; + Assert(xid >= base + FirstNormalTransactionId); + Assert(xid <= base + MaxShortTransactionId); +#endif /* USE_ASSERT_CHECKING */ +} + +static void +heap_page_check_delta(Relation relation, Buffer buffer, + TransactionId xid, TransactionId base, + ShortTransactionId min, ShortTransactionId max, + int64 delta, int64 *freeDelta, int64 *requiredDelta) +{ + BufferDesc *buf; + char *path; + BackendId backend; + + Assert((freeDelta == NULL) == (requiredDelta == NULL)); + + if (xid >= base + delta + FirstNormalTransactionId && + xid <= base + delta + MaxShortTransactionId) + { + return; + } + + if (buffer == InvalidBuffer) + return; + + if (BufferIsLocal(buffer)) + { + buf = GetLocalBufferDescriptor(-buffer - 1); + backend = MyBackendId; + } + else + { + buf = GetBufferDescriptor(buffer - 1); + backend = InvalidBackendId; + } + + path = relpathbackend(buf->tag.rnode, backend, buf->tag.forkNum); + + if (freeDelta == NULL) + elog(FATAL, "Fatal xid base calculation error: xid = %llu, base = %llu, min = %u, max = %u, delta = %lld (rel=%s, blockNum=%u)", + (unsigned long long) xid, (unsigned long long) base, + min, max, + (long long) delta, + path, buf->tag.blockNum); + else + elog(FATAL, "Fatal xid base calculation error: xid = %llu, base = %llu, min = %u, max = %u, freeDelta = %lld, requiredDelta = %lld, delta = %lld (rel=%s, blockNum=%u)", + (unsigned long long) xid, (unsigned long long) base, + min, max, + (long long) *freeDelta, (long long) *requiredDelta, + (long long) delta, + path, buf->tag.blockNum); +} + +static int +heap_page_try_prepare_for_xid(Relation relation, Buffer buffer, Page page, + TransactionId xid, bool multi) +{ + HeapPageSpecial pageSpecial = HeapPageGetSpecial(page); + TransactionId base; + ShortTransactionId min = InvalidTransactionId, + max = InvalidTransactionId; + int64 delta, + freeDelta, + requiredDelta; + + base = multi ? pageSpecial->pd_multi_base : pageSpecial->pd_xid_base; + /* If xid fits the page no action needed. */ + if (xid >= base + FirstNormalTransactionId && + xid <= base + MaxShortTransactionId) + { + return 0; + } + + /* No items on the page? */ + if (!heap_page_xid_min_max(page, multi, &min, &max)) + { + delta = (int64) (xid - FirstNormalTransactionId) - (int64) base; + heap_page_check_delta(relation, buffer, xid, base, min, max, delta, + NULL, NULL); + heap_page_apply_delta(relation, buffer, page, xid, multi, base, delta); + return 0; + } + + /* Can we just shift base on the page? */ + if (xid < base + FirstNormalTransactionId) + { + freeDelta = MaxShortTransactionId - max; + requiredDelta = (base + FirstNormalTransactionId) - xid; + /* Shouldn't consider setting base less than 0 */ + freeDelta = Min(freeDelta, base); + + if (requiredDelta > freeDelta) + return -1; + + delta = -(freeDelta + requiredDelta) / 2; + } + else + { + freeDelta = min - FirstNormalTransactionId; + requiredDelta = xid - (base + MaxShortTransactionId); + + if (requiredDelta > freeDelta) + return -1; + + delta = (freeDelta + requiredDelta) / 2; + } + + heap_page_check_delta(relation, buffer, xid, base, min, max, + delta, &freeDelta, &requiredDelta); + heap_page_apply_delta(relation, buffer, page, xid, multi, base, delta); + + return 1; +} + +/* + * Ensure that given xid fits base of given page. + */ +bool +heap_page_prepare_for_xid(Relation relation, Buffer buffer, + TransactionId xid, bool multi) +{ + Page page = BufferGetPage(buffer); + int res; + + /* "Double xmax" page format doesn't require any preparation */ + if (HeapPageIsDoubleXmax(page)) + return false; + + if (!TransactionIdIsNormal(xid)) + return false; + + res = heap_page_try_prepare_for_xid(relation, buffer, page, xid, multi); + if (res != -1) + return res == 1; + + /* Have to try freeing the page... */ + freeze_single_heap_page(relation, buffer); + + res = heap_page_try_prepare_for_xid(relation, buffer, page, xid, multi); + if (res != -1) + return res == 1; + + elog(ERROR, "could not fit xid into page"); + + return false; +} + +/* + * Ensure that given xid fits base of given page. + */ +void +rewrite_page_prepare_for_xid(Page page, HeapTuple tup) +{ + TransactionId xid; + int res; + + /* xmin */ + xid = HeapTupleGetXmin(tup); + if (TransactionIdIsNormal(xid)) + { + res = heap_page_try_prepare_for_xid(NULL, InvalidBuffer, page, xid, + false); + if (res == -1) + elog(ERROR, "could not fit xid into page"); + } + + /* xmax */ + xid = HeapTupleGetRawXmax(tup); + if (TransactionIdIsNormal(xid)) + { + res = heap_page_try_prepare_for_xid(NULL, InvalidBuffer, page, xid, + tup->t_data->t_infomask & HEAP_XMAX_IS_MULTI); + if (res == -1) + elog(ERROR, "could not fit xid into page"); + } +} + + /* * Subroutine for heap_insert(). Prepares a tuple for insertion. This sets the * tuple header fields and toasts the tuple if necessary. Returns a toasted @@ -2211,7 +2712,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, * that in any case, the header fields are also set in the original tuple. */ static HeapTuple -heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, +heap_prepare_insert(Relation relation, HeapTuple tup, CommandId cid, int options) { /* @@ -2228,12 +2729,12 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, tup->t_data->t_infomask &= ~(HEAP_XACT_MASK); tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); tup->t_data->t_infomask |= HEAP_XMAX_INVALID; - HeapTupleHeaderSetXmin(tup->t_data, xid); + HeapTupleSetXmin(tup, InvalidTransactionId); if (options & HEAP_INSERT_FROZEN) HeapTupleHeaderSetXminFrozen(tup->t_data); HeapTupleHeaderSetCmin(tup->t_data, cid); - HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */ + HeapTupleSetXmax(tup, 0); /* for cleanliness */ tup->t_tableOid = RelationGetRelid(relation); /* @@ -2296,8 +2797,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL); slots[i]->tts_tableOid = RelationGetRelid(relation); tuple->t_tableOid = slots[i]->tts_tableOid; - heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid, - options); + heaptuples[i] = heap_prepare_insert(relation, tuple, cid, options); } /* @@ -2353,6 +2853,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN)) all_frozen_set = true; + heap_page_prepare_for_xid(relation, buffer, xid, false); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2360,6 +2862,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, * RelationGetBufferForTuple has ensured that the first tuple fits. * Put that on the page, and then as many other tuples as fit. */ + HeapTupleSetXmin(heaptuples[ndone], xid); RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false); /* @@ -2376,6 +2879,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace) break; + HeapTupleSetXmin(heaptup, xid); RelationPutHeapTuple(relation, buffer, heaptup, false); /* @@ -2511,6 +3015,10 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, bufflags |= REGBUF_KEEP_DATA; XLogBeginInsert(); + + if (info & XLOG_HEAP_INIT_PAGE) + XLogRegisterData((char *) &HeapPageGetSpecial(page)->pd_xid_base, sizeof(TransactionId)); + XLogRegisterData((char *) xlrec, tupledata - scratch.data); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags); @@ -2731,6 +3239,7 @@ heap_delete(Relation relation, ItemPointer tid, tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; + HeapTupleCopyBaseFromPage(&tp, page); l1: result = HeapTupleSatisfiesUpdate(&tp, cid, buffer); @@ -2748,7 +3257,7 @@ l1: uint16 infomask; /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(tp.t_data); + xwait = HeapTupleGetRawXmax(&tp); infomask = tp.t_data->t_infomask; /* @@ -2787,13 +3296,16 @@ l1: NULL); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + /* Copy possibly updated xid base after relocking */ + HeapTupleCopyBaseFromPage(&tp, page); + /* * If xwait had just locked the tuple then some other xact * could update this tuple before we get to this point. Check * for xmax change, and start over if so. */ if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&tp), xwait)) goto l1; } @@ -2820,13 +3332,16 @@ l1: XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + /* Copy possibly updated xid base after relocking */ + HeapTupleCopyBaseFromPage(&tp, page); + /* * xwait is done, but if xwait had just locked the tuple then some * other xact could update this tuple before we get to this point. * Check for xmax change, and start over if so. */ if (xmax_infomask_changed(tp.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&tp), xwait)) goto l1; @@ -2840,7 +3355,7 @@ l1: */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tp.t_data)) + HeapTupleIsOnlyLocked(&tp)) result = TM_Ok; else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) result = TM_Updated; @@ -2865,9 +3380,9 @@ l1: Assert(result != TM_Updated || !ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)); tmfd->ctid = tp.t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(&tp); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data); + tmfd->cmax = HeapTupleGetCmax(&tp); else tmfd->cmax = InvalidCommandId; UnlockReleaseBuffer(buffer); @@ -2890,7 +3405,7 @@ l1: CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer)); /* replace cid with a combo CID if necessary */ - HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo); + HeapTupleHeaderAdjustCmax(&tp, &cid, &iscombo); /* * Compute replica identity tuple before entering the critical section so @@ -2908,11 +3423,15 @@ l1: */ MultiXactIdSetOldestMember(); - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&tp), tp.t_data->t_infomask, tp.t_data->t_infomask2, xid, LockTupleExclusive, true, &new_xmax, &new_infomask, &new_infomask2); + heap_page_prepare_for_xid(relation, buffer, new_xmax, + (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(&tp, page); + START_CRIT_SECTION(); /* @@ -2938,10 +3457,12 @@ l1: tp.t_data->t_infomask |= new_infomask; tp.t_data->t_infomask2 |= new_infomask2; HeapTupleHeaderClearHotUpdated(tp.t_data); - HeapTupleHeaderSetXmax(tp.t_data, new_xmax); + HeapTupleSetXmax(&tp, new_xmax); + HeapTupleHeaderSetXmax(page, &tp); HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo); /* Make sure there is no forward chain link in t_ctid */ tp.t_data->t_ctid = tp.t_self; + HeapTupleCopyBaseFromPage(&tp, page); /* Signal that this is actually a move into another partition */ if (changingPart) @@ -3132,7 +3653,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, HeapTuple heaptup; HeapTuple old_key_tuple = NULL; bool old_key_copied = false; - Page page; + Page page, + newpage; BlockNumber block; MultiXactStatus mxact_status; Buffer buffer, @@ -3225,6 +3747,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp); oldtup.t_len = ItemIdGetLength(lp); oldtup.t_self = *otid; + HeapTupleCopyBaseFromPage(&oldtup, page); /* the new tuple is ready, except for this: */ newtup->t_tableOid = RelationGetRelid(relation); @@ -3318,7 +3841,7 @@ l2: */ /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data); + xwait = HeapTupleGetRawXmax(&oldtup); infomask = oldtup.t_data->t_infomask; /* @@ -3369,6 +3892,7 @@ l2: checked_lockers = true; locker_remains = remain != 0; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(&oldtup, page); /* * If xwait had just locked the tuple then some other xact @@ -3377,7 +3901,7 @@ l2: */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&oldtup), xwait)) goto l2; } @@ -3403,7 +3927,7 @@ l2: * subxact aborts. */ if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask)) - update_xact = HeapTupleGetUpdateXid(oldtup.t_data); + update_xact = HeapTupleGetUpdateXid(&oldtup); else update_xact = InvalidTransactionId; @@ -3451,6 +3975,8 @@ l2: checked_lockers = true; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(&oldtup, page); + /* * xwait is done, but if xwait had just locked the tuple then some * other xact could update this tuple before we get to this point. @@ -3458,7 +3984,7 @@ l2: */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || !TransactionIdEquals(xwait, - HeapTupleHeaderGetRawXmax(oldtup.t_data))) + HeapTupleGetRawXmax(&oldtup))) goto l2; /* Otherwise check if it committed or aborted */ @@ -3495,9 +4021,9 @@ l2: Assert(result != TM_Updated || !ItemPointerEquals(&oldtup.t_self, &oldtup.t_data->t_ctid)); tmfd->ctid = oldtup.t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(&oldtup); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data); + tmfd->cmax = HeapTupleGetCmax(&oldtup); else tmfd->cmax = InvalidCommandId; UnlockReleaseBuffer(buffer); @@ -3527,6 +4053,7 @@ l2: LockBuffer(buffer, BUFFER_LOCK_UNLOCK); visibilitymap_pin(relation, block, &vmbuffer); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(&oldtup, page); goto l2; } @@ -3536,7 +4063,7 @@ l2: * If the tuple we're updating is locked, we need to preserve the locking * info in the old tuple's Xmax. Prepare a new Xmax value for this. */ - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, true, @@ -3555,7 +4082,7 @@ l2: (checked_lockers && !locker_remains)) xmax_new_tuple = InvalidTransactionId; else - xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data); + xmax_new_tuple = HeapTupleGetRawXmax(&oldtup); if (!TransactionIdIsValid(xmax_new_tuple)) { @@ -3588,17 +4115,15 @@ l2: */ newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK); newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); - HeapTupleHeaderSetXmin(newtup->t_data, xid); HeapTupleHeaderSetCmin(newtup->t_data, cid); newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple; newtup->t_data->t_infomask2 |= infomask2_new_tuple; - HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple); /* * Replace cid with a combo CID if necessary. Note that we already put * the plain cid into the new tuple. */ - HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo); + HeapTupleHeaderAdjustCmax(&oldtup, &cid, &iscombo); /* * If the toaster needs to be activated, OR if the new tuple will not fit @@ -3628,7 +4153,7 @@ l2: newtupsize = MAXALIGN(newtup->t_len); - if (need_toast || newtupsize > pagefree) + if (need_toast || newtupsize > pagefree || HeapPageIsDoubleXmax(page)) { TransactionId xmax_lock_old_tuple; uint16 infomask_lock_old_tuple, @@ -3653,7 +4178,7 @@ l2: * updating, because the potentially created multixact would otherwise * be wrong. */ - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, false, @@ -3662,6 +4187,10 @@ l2: Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple)); + heap_page_prepare_for_xid(relation, buffer, xmax_lock_old_tuple, + (infomask_lock_old_tuple & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(&oldtup, page); + START_CRIT_SECTION(); /* Clear obsolete visibility flags ... */ @@ -3670,10 +4199,12 @@ l2: HeapTupleClearHotUpdated(&oldtup); /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_lock_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple); oldtup.t_data->t_infomask |= infomask_lock_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple; + HeapTupleSetXmax(&oldtup, xmax_lock_old_tuple); + HeapTupleHeaderSetXmax(page, &oldtup); HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); + HeapTupleCopyBaseFromPage(&oldtup, page); /* temporarily make it look not-updated, but locked */ oldtup.t_data->t_ctid = oldtup.t_self; @@ -3755,7 +4286,11 @@ l2: */ for (;;) { - if (newtupsize > pagefree) + /* + * We can't fit new tuple to "double xmax" page, since it's + * impossible to set xmin there. + */ + if (newtupsize > pagefree || HeapPageIsDoubleXmax(page)) { /* It doesn't fit, must use RelationGetBufferForTuple. */ newbuf = RelationGetBufferForTuple(relation, heaptup->t_len, @@ -3788,6 +4323,9 @@ l2: break; } } + + /* Copy possibly updated xid base to old tuple after relocking */ + HeapTupleCopyBaseFromPage(&oldtup, page); } else { @@ -3847,6 +4385,33 @@ l2: id_has_external, &old_key_copied); + newpage = BufferGetPage(newbuf); + + /* + * Prepare pages for the current xid, that witten to the new tuple's Xmax + * and old page's pd_prune_xid. + */ + heap_page_prepare_for_xid(relation, buffer, xid, false); + if (newbuf != buffer) + heap_page_prepare_for_xid(relation, newbuf, xid, false); + + /* Prepare pages for tuple's Xmax */ + heap_page_prepare_for_xid(relation, buffer, xmax_old_tuple, + (infomask_old_tuple & HEAP_XMAX_IS_MULTI) ? true : false); + heap_page_prepare_for_xid(relation, newbuf, xmax_new_tuple, + (heaptup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + + /* Copy possibly updated Xid bases to the both tuples. */ + HeapTupleCopyBaseFromPage(&oldtup, page); + + /* + * Set new tuple's Xmin/Xmax, old tuple's Xmin/Xmax were already shifted. + */ + HeapTupleSetXmin(heaptup, xid); + HeapTupleHeaderSetXmin(newpage, heaptup); + HeapTupleSetXmax(heaptup, xmax_new_tuple); + HeapTupleHeaderSetXmax(newpage, heaptup); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -3889,10 +4454,12 @@ l2: oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple); oldtup.t_data->t_infomask |= infomask_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_old_tuple; + HeapTupleSetXmax(&oldtup, xmax_old_tuple); + HeapTupleHeaderSetXmax(page, &oldtup); HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); + HeapTupleCopyBaseFromPage(&oldtup, page); /* record address of new tuple in t_ctid of old one */ oldtup.t_data->t_ctid = heaptup->t_self; @@ -3946,6 +4513,20 @@ l2: END_CRIT_SECTION(); + if (newtup != heaptup) + { + /* + * Set new tuple's Xmin/Xmax only after both xid base preparations. + * Old tuple's Xmin/Xmax were already shifted because old tuple is on + * the page. + */ + HeapTupleCopyBase(newtup, heaptup); + HeapTupleSetXmin(newtup, xid); + HeapTupleHeaderSetXmin(newpage, newtup); + HeapTupleSetXmax(newtup, xmax_new_tuple); + HeapTupleHeaderSetXmax(newpage, newtup); + } + if (newbuf != buffer) LockBuffer(newbuf, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); @@ -4284,6 +4865,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyBaseFromPage(tuple, page); l3: result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer); @@ -4310,7 +4892,7 @@ l3: ItemPointerData t_ctid; /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(tuple->t_data); + xwait = HeapTupleGetRawXmax(tuple); infomask = tuple->t_data->t_infomask; infomask2 = tuple->t_data->t_infomask2; ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid); @@ -4468,11 +5050,13 @@ l3: result = res; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } } LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); /* * Make sure it's still an appropriate lock, else start over. @@ -4481,7 +5065,7 @@ l3: * now need to follow the update chain to lock the new * versions. */ - if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) && + if (!HeapTupleIsOnlyLocked(tuple) && ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) || !updated)) goto l3; @@ -4508,6 +5092,7 @@ l3: !HEAP_XMAX_IS_EXCL_LOCKED(infomask)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); /* * Make sure it's still an appropriate lock, else start over. @@ -4536,8 +5121,10 @@ l3: * meantime, start over. */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); + if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; @@ -4548,10 +5135,11 @@ l3: else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); /* if the xmax changed in the meantime, start over */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; /* otherwise, we're good */ @@ -4576,8 +5164,10 @@ l3: { /* ... but if the xmax changed in the meantime, start over */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); + if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; Assert(HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask)); @@ -4598,6 +5188,7 @@ l3: if (require_sleep && (result == TM_Updated || result == TM_Deleted)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } else if (require_sleep) @@ -4623,6 +5214,7 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } @@ -4649,6 +5241,7 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } break; @@ -4689,6 +5282,7 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } break; @@ -4715,11 +5309,13 @@ l3: result = res; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto failed; } } LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); /* * xwait is done, but if xwait had just locked the tuple then some @@ -4727,7 +5323,7 @@ l3: * Check for xmax change, and start over if so. */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; @@ -4755,7 +5351,7 @@ l3: if (!require_sleep || (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tuple->t_data)) + HeapTupleIsOnlyLocked(tuple)) result = TM_Ok; else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid)) result = TM_Updated; @@ -4781,9 +5377,9 @@ failed: Assert(result != TM_Updated || !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid)); tmfd->ctid = tuple->t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(tuple); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data); + tmfd->cmax = HeapTupleGetCmax(tuple); else tmfd->cmax = InvalidCommandId; goto out_locked; @@ -4803,10 +5399,11 @@ failed: LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); visibilitymap_pin(relation, block, &vmbuffer); LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyBaseFromPage(tuple, page); goto l3; } - xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); + xmax = HeapTupleGetRawXmax(tuple); old_infomask = tuple->t_data->t_infomask; /* @@ -4828,6 +5425,10 @@ failed: GetCurrentTransactionId(), mode, false, &xid, &new_infomask, &new_infomask2); + heap_page_prepare_for_xid(relation, *buffer, xid, + (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(tuple, page); + START_CRIT_SECTION(); /* @@ -4846,7 +5447,8 @@ failed: tuple->t_data->t_infomask2 |= new_infomask2; if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask)) HeapTupleHeaderClearHotUpdated(tuple->t_data); - HeapTupleHeaderSetXmax(tuple->t_data, xid); + HeapTupleSetXmax(tuple, xid); + HeapTupleHeaderSetXmax(page, tuple); /* * Make sure there is no forward chain link in t_ctid. Note that in the @@ -5440,12 +6042,18 @@ l4: LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); } + /* + * Copy xid base after buffer relocking, it could have changed since + * heap_fetch(). + */ + HeapTupleCopyBaseFromPage(&mytup, BufferGetPage(buf)); + /* * Check the tuple XMIN against prior XMAX, if any. If we reached the * end of the chain, we're done, so return success. */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data), + !TransactionIdEquals(HeapTupleGetXmin(&mytup), priorXmax)) { result = TM_Ok; @@ -5457,7 +6065,7 @@ l4: * (sub)transaction, then we already locked the last live one in the * chain, thus we're done, so return success. */ - if (TransactionIdDidAbort(HeapTupleHeaderGetXmin(mytup.t_data))) + if (TransactionIdDidAbort(HeapTupleGetXmin(&mytup))) { result = TM_Ok; goto out_locked; @@ -5465,7 +6073,7 @@ l4: old_infomask = mytup.t_data->t_infomask; old_infomask2 = mytup.t_data->t_infomask2; - xmax = HeapTupleHeaderGetRawXmax(mytup.t_data); + xmax = HeapTupleGetRawXmax(&mytup); /* * If this tuple version has been updated or locked by some concurrent @@ -5478,7 +6086,7 @@ l4: TransactionId rawxmax; bool needwait; - rawxmax = HeapTupleHeaderGetRawXmax(mytup.t_data); + rawxmax = HeapTupleGetRawXmax(&mytup); if (old_infomask & HEAP_XMAX_IS_MULTI) { int nmembers; @@ -5619,14 +6227,19 @@ l4: VISIBILITYMAP_ALL_FROZEN)) cleared_all_frozen = true; + heap_page_prepare_for_xid(rel, buf, new_xmax, + (new_infomask & HEAP_XMAX_IS_MULTI) ? true : false); + HeapTupleCopyBaseFromPage(&mytup, BufferGetPage(buf)); + START_CRIT_SECTION(); /* ... and set them */ - HeapTupleHeaderSetXmax(mytup.t_data, new_xmax); mytup.t_data->t_infomask &= ~HEAP_XMAX_BITS; mytup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; mytup.t_data->t_infomask |= new_infomask; mytup.t_data->t_infomask2 |= new_infomask2; + HeapTupleSetXmax(&mytup, new_xmax); + HeapTupleHeaderSetXmax(BufferGetPage(buf), &mytup); MarkBufferDirty(buf); @@ -5660,14 +6273,14 @@ next: if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID || HeapTupleHeaderIndicatesMovedPartitions(mytup.t_data) || ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) || - HeapTupleHeaderIsOnlyLocked(mytup.t_data)) + HeapTupleIsOnlyLocked(&mytup)) { result = TM_Ok; goto out_locked; } /* tail recursion */ - priorXmax = HeapTupleHeaderGetUpdateXid(mytup.t_data); + priorXmax = HeapTupleGetUpdateXidAny(&mytup); ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid); UnlockReleaseBuffer(buf); } @@ -5874,12 +6487,13 @@ heap_abort_speculative(Relation relation, ItemPointer tid) tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; + HeapTupleCopyBaseFromPage(&tp, page); /* * Sanity check that the tuple really is a speculatively inserted tuple, * inserted by us. */ - if (tp.t_data->t_choice.t_heap.t_xmin != xid) + if (HeapTupleGetRawXmin(&tp) != xid) elog(ERROR, "attempted to kill a tuple inserted by another transaction"); if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data))) elog(ERROR, "attempted to kill a non-speculative tuple"); @@ -5908,6 +6522,8 @@ heap_abort_speculative(Relation relation, ItemPointer tid) prune_xid = relation->rd_rel->relfrozenxid; else prune_xid = TransactionXmin; + Assert(TransactionIdIsValid(prune_xid)); + heap_page_prepare_for_xid(relation, buffer, prune_xid, false); PageSetPrunable(page, prune_xid); /* store transaction information of xact deleting the tuple */ @@ -5917,9 +6533,12 @@ heap_abort_speculative(Relation relation, ItemPointer tid) /* * Set the tuple header xmin to InvalidTransactionId. This makes the * tuple immediately invisible everyone. (In particular, to any - * transactions waiting on the speculative token, woken up later.) + * transactions waiting on the speculative token, woken up later.) Don't + * need to reload xid base from page because InvalidTransactionId doesn't + * require xid base to be valid. */ - HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId); + HeapTupleSetXmin(&tp, InvalidTransactionId); + HeapTupleHeaderSetXmin(page, &tp); /* Clear the speculative insertion token too */ tp.t_data->t_ctid = tp.t_self; @@ -6267,7 +6886,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * individual members might even show that we don't need to keep anything. */ nnewmembers = 0; - newmembers = palloc(sizeof(MultiXactMember) * nmembers); + newmembers = palloc0(sizeof(MultiXactMember) * nmembers); has_lockers = false; update_xid = InvalidTransactionId; update_committed = false; @@ -6467,7 +7086,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * The *frz WAL record we output completely removes all old XIDs during REDO. */ bool -heap_prepare_freeze_tuple(HeapTupleHeader tuple, +heap_prepare_freeze_tuple(HeapTuple htup, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi, xl_heap_freeze_tuple *frz, bool *totally_frozen, @@ -6479,11 +7098,12 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, bool xmin_frozen; bool freeze_xmax; TransactionId xid; + HeapTupleHeader tuple = htup->t_data; frz->frzflags = 0; frz->t_infomask2 = tuple->t_infomask2; frz->t_infomask = tuple->t_infomask; - frz->xmax = HeapTupleHeaderGetRawXmax(tuple); + frz->xmax = HeapTupleGetRawXmax(htup); /* * Process xmin. xmin_frozen has two slightly different meanings: in the @@ -6495,7 +7115,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * handling, since either way the tuple's xmin will be a permanent value * once we're done with it. */ - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(htup); if (!TransactionIdIsNormal(xid)) xmin_frozen = true; else @@ -6537,7 +7157,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * * Make sure to keep heap_tuple_would_freeze in sync with this. */ - xid = HeapTupleHeaderGetRawXmax(tuple); + xid = HeapTupleGetRawXmax(htup); if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { @@ -6671,7 +7291,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, } } else if ((tuple->t_infomask & HEAP_XMAX_INVALID) || - !TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple))) + !TransactionIdIsValid(HeapTupleGetRawXmax(htup))) { freeze_xmax = false; xmax_already_frozen = true; @@ -6767,18 +7387,30 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * NB: All code in here must be safe to execute during crash recovery! */ void -heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz) +heap_execute_freeze_tuple(HeapTuple htup, xl_heap_freeze_tuple *frz) { - HeapTupleHeaderSetXmax(tuple, frz->xmax); + HeapTupleHeader tuple = htup->t_data; + + tuple->t_infomask = frz->t_infomask; + tuple->t_infomask2 = frz->t_infomask2; + + HeapTupleSetXmax(htup, frz->xmax); if (frz->frzflags & XLH_FREEZE_XVAC) HeapTupleHeaderSetXvac(tuple, FrozenTransactionId); if (frz->frzflags & XLH_INVALID_XVAC) HeapTupleHeaderSetXvac(tuple, InvalidTransactionId); +} - tuple->t_infomask = frz->t_infomask; - tuple->t_infomask2 = frz->t_infomask2; +void +heap_execute_freeze_tuple_page(Page page, HeapTupleHeader htup, xl_heap_freeze_tuple *frz) +{ + HeapTupleData tuple; + + tuple.t_data = htup; + heap_execute_freeze_tuple(&tuple, frz); + HeapTupleHeaderSetXmax(page, &tuple); } /* @@ -6788,7 +7420,7 @@ heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz) * Useful for callers like CLUSTER that perform their own WAL logging. */ bool -heap_freeze_tuple(HeapTupleHeader tuple, +heap_freeze_tuple(HeapTuple tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi) { @@ -6955,10 +7587,10 @@ MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask) * checking the hint bits. */ TransactionId -HeapTupleGetUpdateXid(HeapTupleHeader tuple) +HeapTupleGetUpdateXid(HeapTuple tuple) { - return MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(tuple), - tuple->t_infomask); + return MultiXactIdGetUpdateXid(HeapTupleGetRawXmax(tuple), + tuple->t_data->t_infomask); } /* @@ -7184,15 +7816,18 @@ ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, * will eventually require freezing (if tuple isn't removed by pruning first). */ bool -heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) +heap_tuple_needs_eventual_freeze(HeapTuple htup) { TransactionId xid; + HeapTupleHeader tuple; + + tuple = htup->t_data; /* * If xmin is a normal transaction ID, this tuple is definitely not * frozen. */ - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(htup); if (TransactionIdIsNormal(xid)) return true; @@ -7203,13 +7838,13 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) { MultiXactId multi; - multi = HeapTupleHeaderGetRawXmax(tuple); + multi = HeapTupleGetRawXmax(htup); if (MultiXactIdIsValid(multi)) return true; } else { - xid = HeapTupleHeaderGetRawXmax(tuple); + xid = HeapTupleGetRawXmax(htup); if (TransactionIdIsNormal(xid)) return true; } @@ -7237,7 +7872,7 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) * never freeze here, which makes tracking the oldest extant XID/MXID simple. */ bool -heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, +heap_tuple_would_freeze(HeapTuple htup, TransactionId cutoff_xid, MultiXactId cutoff_multi, TransactionId *relfrozenxid_out, MultiXactId *relminmxid_out) @@ -7245,9 +7880,10 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, TransactionId xid; MultiXactId multi; bool would_freeze = false; + HeapTupleHeader tuple = htup->t_data; /* First deal with xmin */ - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(htup); if (TransactionIdIsNormal(xid)) { if (TransactionIdPrecedes(xid, *relfrozenxid_out)) @@ -7260,9 +7896,9 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, xid = InvalidTransactionId; multi = InvalidMultiXactId; if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) - multi = HeapTupleHeaderGetRawXmax(tuple); + multi = HeapTupleGetRawXmax(htup); else - xid = HeapTupleHeaderGetRawXmax(tuple); + xid = HeapTupleGetRawXmax(htup); if (TransactionIdIsNormal(xid)) { @@ -7335,14 +7971,14 @@ heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, * with queries. */ void -HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, +HeapTupleHeaderAdvanceLatestRemovedXid(HeapTuple tuple, TransactionId *latestRemovedXid) { - TransactionId xmin = HeapTupleHeaderGetXmin(tuple); - TransactionId xmax = HeapTupleHeaderGetUpdateXid(tuple); - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); + TransactionId xmin = HeapTupleGetXmin(tuple); + TransactionId xmax = HeapTupleGetUpdateXidAny(tuple); + TransactionId xvac = HeapTupleHeaderGetXvac(tuple->t_data); - if (tuple->t_infomask & HEAP_MOVED) + if (tuple->t_data->t_infomask & HEAP_MOVED) { if (TransactionIdPrecedes(*latestRemovedXid, xvac)) *latestRemovedXid = xvac; @@ -7354,8 +7990,8 @@ HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, * * Look for a committed hint bit, or if no xmin bit is set, check clog. */ - if (HeapTupleHeaderXminCommitted(tuple) || - (!HeapTupleHeaderXminInvalid(tuple) && TransactionIdDidCommit(xmin))) + if (HeapTupleHeaderXminCommitted(tuple->t_data) || + (!HeapTupleHeaderXminInvalid(tuple->t_data) && TransactionIdDidCommit(xmin))) { if (xmax != xmin && TransactionIdFollows(xmax, *latestRemovedXid)) @@ -7705,7 +8341,7 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) for (;;) { ItemId lp; - HeapTupleHeader htup; + HeapTupleData htup; /* Sanity check (pure paranoia) */ if (offnum < FirstOffsetNumber) @@ -7742,16 +8378,18 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) if (!ItemIdIsNormal(lp)) break; - htup = (HeapTupleHeader) PageGetItem(page, lp); + htup.t_data = (HeapTupleHeader) PageGetItem(page, lp); + htup.t_len = ItemIdGetLength(lp); + HeapTupleCopyBaseFromPage(&htup, page); /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) + !TransactionIdEquals(HeapTupleGetXmin(&htup), priorXmax)) break; - HeapTupleHeaderAdvanceLatestRemovedXid(htup, &latestRemovedXid); + HeapTupleHeaderAdvanceLatestRemovedXid(&htup, &latestRemovedXid); /* * If the tuple is not HOT-updated, then we are at the end of this @@ -7759,13 +8397,13 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) * chain (they get their own index entries) -- just move on to * next htid from index AM caller. */ - if (!HeapTupleHeaderIsHotUpdated(htup)) + if (!HeapTupleHeaderIsHotUpdated(htup.t_data)) break; /* Advance to next HOT chain member */ - Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno); - offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + Assert(ItemPointerGetBlockNumber(&htup.t_data->t_ctid) == blkno); + offnum = ItemPointerGetOffsetNumber(&htup.t_data->t_ctid); + priorXmax = HeapTupleGetUpdateXidAny(&htup); } /* Enable further/final shrinking of deltids for caller */ @@ -8345,13 +8983,13 @@ log_heap_update(Relation reln, Buffer oldbuf, /* Prepare WAL data for the old page */ xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self); - xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data); + xlrec.old_xmax = HeapTupleGetRawXmax(oldtup); xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask, oldtup->t_data->t_infomask2); /* Prepare WAL data for the new page */ xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self); - xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data); + xlrec.new_xmax = HeapTupleGetRawXmax(newtup); bufflags = REGBUF_STANDARD; if (init) @@ -8363,6 +9001,9 @@ log_heap_update(Relation reln, Buffer oldbuf, if (oldbuf != newbuf) XLogRegisterBuffer(1, oldbuf, REGBUF_STANDARD); + if (info & XLOG_HEAP_INIT_PAGE) + XLogRegisterData((char *) &HeapPageGetSpecial(page)->pd_xid_base, sizeof(TransactionId)); + XLogRegisterData((char *) &xlrec, SizeOfHeapUpdate); /* @@ -8475,8 +9116,8 @@ log_heap_new_cid(Relation relation, HeapTuple tup) { Assert(!(hdr->t_infomask & HEAP_XMAX_INVALID)); Assert(!HeapTupleHeaderXminInvalid(hdr)); - xlrec.cmin = HeapTupleHeaderGetCmin(hdr); - xlrec.cmax = HeapTupleHeaderGetCmax(hdr); + xlrec.cmin = HeapTupleGetCmin(tup); + xlrec.cmax = HeapTupleGetCmax(tup); xlrec.combocid = HeapTupleHeaderGetRawCommandId(hdr); } /* No combo CID, so only cmin or cmax can be set by this TX */ @@ -8678,7 +9319,8 @@ heap_xlog_prune(XLogReaderState *record) heap_page_prune_execute(buffer, redirected, nredirected, nowdead, ndead, - nowunused, nunused); + nowunused, nunused, + true); /* * Note: we don't worry about updating the page's prunability hints. @@ -8970,7 +9612,7 @@ heap_xlog_freeze_page(XLogReaderState *record) lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */ tuple = (HeapTupleHeader) PageGetItem(page, lp); - heap_execute_freeze_tuple(tuple, xlrec_tp); + heap_execute_freeze_tuple_page(page, tuple, xlrec_tp); } PageSetLSN(page, lsn); @@ -9041,6 +9683,8 @@ heap_xlog_delete(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = BufferGetPage(buffer); if (PageGetMaxOffsetNumber(page) >= xlrec->offnum) @@ -9056,10 +9700,18 @@ heap_xlog_delete(XLogReaderState *record) HeapTupleHeaderClearHotUpdated(htup); fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); + tuple.t_data = htup; + if (!(xlrec->flags & XLH_DELETE_IS_SUPER)) - HeapTupleHeaderSetXmax(htup, xlrec->xmax); + { + HeapTupleSetXmax(&tuple, xlrec->xmax); + HeapTupleHeaderSetXmax(page, &tuple); + } else - HeapTupleHeaderSetXmin(htup, InvalidTransactionId); + { + HeapTupleSetXmin(&tuple, InvalidTransactionId); + HeapTupleHeaderSetXmin(page, &tuple); + } HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Mark the page as a candidate for pruning */ @@ -9084,7 +9736,7 @@ static void heap_xlog_insert(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; - xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record); + xl_heap_insert *xlrec; Buffer buffer; Page page; union @@ -9100,6 +9752,17 @@ heap_xlog_insert(XLogReaderState *record) BlockNumber blkno; ItemPointerData target_tid; XLogRedoAction action; + bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; + TransactionId pd_xid_base = InvalidTransactionId; + Pointer rec_data = (Pointer) XLogRecGetData(record); + + if (isinit) + { + pd_xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + + xlrec = (xl_heap_insert *) rec_data; XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno); ItemPointerSetBlockNumber(&target_tid, blkno); @@ -9124,11 +9787,12 @@ heap_xlog_insert(XLogReaderState *record) * If we inserted the first and only tuple on the page, re-initialize the * page from scratch. */ - if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) + if (isinit) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); - PageInit(page, BufferGetPageSize(buffer), 0); + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base; action = BLK_NEEDS_REDO; } else @@ -9137,6 +9801,7 @@ heap_xlog_insert(XLogReaderState *record) { Size datalen; char *data; + HeapTupleData tuple; page = BufferGetPage(buffer); @@ -9160,7 +9825,9 @@ heap_xlog_insert(XLogReaderState *record) htup->t_infomask2 = xlhdr.t_infomask2; htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + tuple.t_data = htup; + HeapTupleSetXmin(&tuple, XLogRecGetXid(record)); + HeapTupleHeaderSetXmin(page, &tuple); HeapTupleHeaderSetCmin(htup, FirstCommandId); htup->t_ctid = target_tid; @@ -9220,12 +9887,19 @@ heap_xlog_multi_insert(XLogReaderState *record) int i; bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; XLogRedoAction action; + TransactionId pd_xid_base = InvalidTransactionId; + Pointer rec_data = (Pointer) XLogRecGetData(record); /* * Insertion doesn't overwrite MVCC data, so no conflict processing is * required. */ - xlrec = (xl_heap_multi_insert *) XLogRecGetData(record); + if (isinit) + { + pd_xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + xlrec = (xl_heap_multi_insert *) rec_data; XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno); @@ -9252,7 +9926,8 @@ heap_xlog_multi_insert(XLogReaderState *record) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); - PageInit(page, BufferGetPageSize(buffer), 0); + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base; action = BLK_NEEDS_REDO; } else @@ -9273,6 +9948,7 @@ heap_xlog_multi_insert(XLogReaderState *record) { OffsetNumber offnum; xl_multi_insert_tuple *xlhdr; + HeapTupleData tuple; /* * If we're reinitializing the page, the tuples are stored in @@ -9303,7 +9979,9 @@ heap_xlog_multi_insert(XLogReaderState *record) htup->t_infomask2 = xlhdr->t_infomask2; htup->t_infomask = xlhdr->t_infomask; htup->t_hoff = xlhdr->t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + tuple.t_data = htup; + HeapTupleSetXmin(&tuple, XLogRecGetXid(record)); + HeapTupleHeaderSetXmin(page, &tuple); HeapTupleHeaderSetCmin(htup, FirstCommandId); ItemPointerSetBlockNumber(&htup->t_ctid, blkno); ItemPointerSetOffsetNumber(&htup->t_ctid, offnum); @@ -9351,7 +10029,7 @@ static void heap_xlog_update(XLogReaderState *record, bool hot_update) { XLogRecPtr lsn = record->EndRecPtr; - xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record); + xl_heap_update *xlrec; RelFileNode rnode; BlockNumber oldblk; BlockNumber newblk; @@ -9376,6 +10054,17 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) Size freespace = 0; XLogRedoAction oldaction; XLogRedoAction newaction; + bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; + TransactionId pd_xid_base = InvalidTransactionId; + Pointer rec_data = (Pointer) XLogRecGetData(record); + + if (isinit) + { + pd_xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + + xlrec = (xl_heap_update *) rec_data; /* initialize to keep the compiler quiet */ oldtup.t_data = NULL; @@ -9422,6 +10111,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) &obuffer); if (oldaction == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = BufferGetPage(obuffer); offnum = xlrec->old_offnum; if (PageGetMaxOffsetNumber(page) >= offnum) @@ -9434,6 +10125,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) oldtup.t_data = htup; oldtup.t_len = ItemIdGetLength(lp); + HeapTupleCopyBaseFromPage(&oldtup, page); htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; @@ -9443,7 +10135,9 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) HeapTupleHeaderClearHotUpdated(htup); fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask, &htup->t_infomask2); - HeapTupleHeaderSetXmax(htup, xlrec->old_xmax); + tuple.t_data = htup; + HeapTupleSetXmax(&tuple, xlrec->old_xmax); + HeapTupleHeaderSetXmax(page, &tuple); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Set forward chain link in t_ctid */ htup->t_ctid = newtid; @@ -9466,11 +10160,12 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) nbuffer = obuffer; newaction = oldaction; } - else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) + else if (isinit) { nbuffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(nbuffer); - PageInit(page, BufferGetPageSize(nbuffer), 0); + PageInit(page, BufferGetPageSize(nbuffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = pd_xid_base; newaction = BLK_NEEDS_REDO; } else @@ -9498,6 +10193,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) char *recdata_end; Size datalen; Size tuplen; + HeapTupleData tuple; recdata = XLogRecGetBlockData(record, 0, &datalen); recdata_end = recdata + datalen; @@ -9576,9 +10272,12 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + tuple.t_data = htup; + HeapTupleSetXmin(&tuple, XLogRecGetXid(record)); + HeapTupleHeaderSetXmin(page, &tuple); HeapTupleHeaderSetCmin(htup, FirstCommandId); - HeapTupleHeaderSetXmax(htup, xlrec->new_xmax); + HeapTupleSetXmax(&tuple, xlrec->new_xmax); + HeapTupleHeaderSetXmax(page, &tuple); /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = newtid; @@ -9689,6 +10388,8 @@ heap_xlog_lock(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = (Page) BufferGetPage(buffer); offnum = xlrec->offnum; @@ -9717,7 +10418,10 @@ heap_xlog_lock(XLogReaderState *record) BufferGetBlockNumber(buffer), offnum); } - HeapTupleHeaderSetXmax(htup, xlrec->locking_xid); + + tuple.t_data = htup; + HeapTupleSetXmax(&tuple, xlrec->locking_xid); + HeapTupleHeaderSetXmax(page, &tuple); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); PageSetLSN(page, lsn); MarkBufferDirty(buffer); @@ -9762,6 +10466,8 @@ heap_xlog_lock_updated(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = BufferGetPage(buffer); offnum = xlrec->offnum; @@ -9777,7 +10483,9 @@ heap_xlog_lock_updated(XLogReaderState *record) htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); - HeapTupleHeaderSetXmax(htup, xlrec->xmax); + tuple.t_data = htup; + HeapTupleSetXmax(&tuple, xlrec->xmax); + HeapTupleHeaderSetXmax(page, &tuple); PageSetLSN(page, lsn); MarkBufferDirty(buffer); @@ -9827,6 +10535,31 @@ heap_xlog_inplace(XLogReaderState *record) UnlockReleaseBuffer(buffer); } +static void +heap_xlog_base_shift(XLogReaderState *record) +{ + XLogRecPtr lsn = record->EndRecPtr; + xl_heap_base_shift *xlrec = (xl_heap_base_shift *) XLogRecGetData(record); + Buffer buffer; + Page page; + BlockNumber blkno; + RelFileNode target_node; + + XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno); + + if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) + { + page = BufferGetPage(buffer); + heap_page_shift_base(NULL, InvalidBuffer, page, xlrec->multi, xlrec->delta); + PageSetLSN(page, lsn); + MarkBufferDirty(buffer); + } + + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); +} + + void heap_redo(XLogReaderState *record) { @@ -9913,6 +10646,21 @@ heap2_redo(XLogReaderState *record) } } +void +heap3_redo(XLogReaderState *record) +{ + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + switch (info & XLOG_HEAP_OPMASK) + { + case XLOG_HEAP3_BASE_SHIFT: + heap_xlog_base_shift(record); + break; + default: + elog(PANIC, "heap3_redo: unknown op code %u", info); + } +} + /* * Mask a heap page before performing consistency checks on it. */ @@ -9925,6 +10673,10 @@ heap_mask(char *pagedata, BlockNumber blkno) mask_page_lsn_and_checksum(page); mask_page_hint_bits(page); + + /* Ignore prune_xid (it's like a hint-bit) */ + HeapPageSetPruneXid(page, InvalidTransactionId); + mask_unused_space(page); for (off = 1; off <= PageGetMaxOffsetNumber(page); off++) @@ -10040,14 +10792,14 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation, case HEAPTUPLE_LIVE: if (visible) return; - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); break; case HEAPTUPLE_RECENTLY_DEAD: case HEAPTUPLE_DELETE_IN_PROGRESS: if (visible) - xid = HeapTupleHeaderGetUpdateXid(tuple->t_data); + xid = HeapTupleGetUpdateXidAny(tuple); else - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); if (TransactionIdPrecedes(xid, TransactionXmin)) { @@ -10057,7 +10809,7 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation, } break; case HEAPTUPLE_INSERT_IN_PROGRESS: - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); break; case HEAPTUPLE_DEAD: Assert(!visible); diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index dbb93b1aa91..73454f65459 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -413,7 +413,7 @@ tuple_lock_retry: * changes in an existing tuple, except to invalid or * frozen, and neither of those can match priorXmax.) */ - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data), + if (!TransactionIdEquals(HeapTupleGetXmin(tuple), priorXmax)) { ReleaseBuffer(buffer); @@ -473,7 +473,7 @@ tuple_lock_retry: * variable instead of doing HeapTupleHeaderGetXmin again. */ if (TransactionIdIsCurrentTransactionId(priorXmax) && - HeapTupleHeaderGetCmin(tuple->t_data) >= cid) + HeapTupleGetCmin(tuple) >= cid) { tmfd->xmax = priorXmax; @@ -481,7 +481,7 @@ tuple_lock_retry: * Cmin is the problematic value, so store that. See * above. */ - tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data); + tmfd->cmax = HeapTupleGetCmin(tuple); ReleaseBuffer(buffer); return TM_SelfModified; } @@ -507,7 +507,7 @@ tuple_lock_retry: /* * As above, if xmin isn't what we're expecting, do nothing. */ - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data), + if (!TransactionIdEquals(HeapTupleGetXmin(tuple), priorXmax)) { ReleaseBuffer(buffer); @@ -538,7 +538,7 @@ tuple_lock_retry: /* updated, so look at the updated row */ *tid = tuple->t_data->t_ctid; /* updated row should have xmin matching this xmax */ - priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); + priorXmax = HeapTupleGetUpdateXidAny(tuple); ReleaseBuffer(buffer); /* loop back to fetch next in chain */ } @@ -858,7 +858,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, * case we had better copy it. */ if (!is_system_catalog && - !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) + !TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple))) elog(WARNING, "concurrent insert in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as live */ @@ -870,7 +870,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, * Similar situation to INSERT_IN_PROGRESS case. */ if (!is_system_catalog && - !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data))) + !TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple))) elog(WARNING, "concurrent delete in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as recently dead */ @@ -1055,6 +1055,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, targtuple->t_tableOid = RelationGetRelid(scan->rs_rd); targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid); targtuple->t_len = ItemIdGetLength(itemid); + HeapTupleCopyBaseFromPage(targtuple, targpage); switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin, hscan->rs_cbuf)) @@ -1090,7 +1091,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, * numbers we report to the cumulative stats system to make * this come out right.) */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(targtuple))) { sample_it = true; *liverows += 1; @@ -1121,7 +1122,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, * but not the post-image. We also get sane results if the * concurrent transaction never commits. */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(targtuple))) *deadrows += 1; else { @@ -1463,7 +1464,7 @@ heapam_index_build_range_scan(Relation heapRelation, * before commit there. Give a warning if neither case * applies. */ - xwait = HeapTupleHeaderGetXmin(heapTuple->t_data); + xwait = HeapTupleGetXmin(heapTuple); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) @@ -1522,7 +1523,7 @@ heapam_index_build_range_scan(Relation heapRelation, break; } - xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); + xwait = HeapTupleGetUpdateXidAny(heapTuple); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) @@ -2200,13 +2201,14 @@ heapam_scan_bitmap_next_block(TableScanDesc scan, loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); loctup.t_len = ItemIdGetLength(lp); loctup.t_tableOid = scan->rs_rd->rd_id; + HeapTupleCopyBaseFromPage(&loctup, dp); ItemPointerSet(&loctup.t_self, page, offnum); valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer); if (valid) { hscan->rs_vistuples[ntup++] = offnum; PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot, - HeapTupleHeaderGetXmin(loctup.t_data)); + HeapTupleGetXmin(&loctup)); } HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup, buffer, snapshot); @@ -2245,6 +2247,7 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan, hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); hscan->rs_ctup.t_len = ItemIdGetLength(lp); hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; + HeapTupleCopyBaseFromPage(&hscan->rs_ctup, dp); ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset); pgstat_count_heap_fetch(scan->rs_rd); @@ -2385,6 +2388,7 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple->t_len = ItemIdGetLength(itemid); + HeapTupleCopyBaseFromPage(tuple, page); ItemPointerSet(&(tuple->t_self), blockno, tupoffset); diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index ff0b8a688de..4112a936d1f 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -217,7 +217,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; @@ -229,7 +229,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -241,7 +241,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return false; } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -251,11 +251,11 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return false; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) return false; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -284,7 +284,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -299,17 +299,17 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return true; } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return true; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -327,7 +327,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); return false; } @@ -416,7 +416,7 @@ HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, * is canceled by super-deleting the tuple. This also applies to * TOAST tuples created during speculative insertion. */ - else if (!TransactionIdIsValid(HeapTupleHeaderGetXmin(tuple))) + else if (!TransactionIdIsValid(HeapTupleGetXmin(htup))) return false; } @@ -506,9 +506,9 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { - if (HeapTupleHeaderGetCmin(tuple) >= curcid) + if (HeapTupleGetCmin(htup) >= curcid) return TM_Invisible; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ @@ -518,7 +518,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, { TransactionId xmax; - xmax = HeapTupleHeaderGetRawXmax(tuple); + xmax = HeapTupleGetRawXmax(htup); /* * Careful here: even though this tuple was created by our own @@ -549,7 +549,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -557,21 +557,21 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, /* deleting subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; return TM_Ok; } else { - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -579,16 +579,16 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, return TM_Ok; } - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) return TM_Invisible; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -622,17 +622,17 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), true)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true)) return TM_BeingModified; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return TM_Ok; } - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); if (!TransactionIdIsValid(xmax)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; } @@ -641,13 +641,13 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, if (TransactionIdIsCurrentTransactionId(xmax)) { - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; if (TransactionIdDidCommit(xmax)) @@ -663,7 +663,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, * what about the other members? */ - if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) { /* * There's no member, even just a locker, alive anymore, so we can @@ -680,20 +680,20 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return TM_BeingModified; - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return TM_BeingModified; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -711,7 +711,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid)) return TM_Updated; /* updated by other */ else @@ -794,7 +794,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; @@ -806,7 +806,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -818,7 +818,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return false; } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -828,7 +828,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return false; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) { /* * Return the speculative token to caller. Caller can worry about @@ -844,13 +844,13 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Assert(snapshot->speculativeToken != 0); } - snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple); + snapshot->xmin = HeapTupleGetRawXmin(htup); /* XXX shouldn't we fall through to look at xmax? */ return true; /* in insertion by other */ } - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -879,7 +879,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -897,21 +897,21 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return true; } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) { if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) - snapshot->xmax = HeapTupleHeaderGetRawXmax(tuple); + snapshot->xmax = HeapTupleGetRawXmax(htup); return true; } - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -929,7 +929,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); return false; /* updated by other */ } @@ -1008,9 +1008,9 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { - if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) + if (HeapTupleGetCmin(htup) >= snapshot->curcid) return false; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ @@ -1023,7 +1023,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -1031,13 +1031,13 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* updating subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) return true; - else if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + else if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* updated after scan started */ else return false; /* updated before scan started */ } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -1045,16 +1045,16 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, return true; } - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } - else if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) + else if (XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot)) return false; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -1067,7 +1067,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, { /* xmin is committed, but maybe not according to our snapshot */ if (!HeapTupleHeaderXminFrozen(tuple) && - XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) + XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot)) return false; /* treat as still in progress */ } @@ -1086,14 +1086,14 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); if (TransactionIdIsCurrentTransactionId(xmax)) { - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ @@ -1108,18 +1108,18 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } - if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) + if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot)) return true; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -1129,12 +1129,12 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* xmax transaction committed */ SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); } else { /* xmax is committed, but maybe not according to our snapshot */ - if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) + if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot)) return true; /* treat as still in progress */ } @@ -1249,21 +1249,21 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de return HEAPTUPLE_DEAD; } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HEAPTUPLE_INSERT_IN_PROGRESS; /* only locked? run infomask-only check first, for performance */ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tuple)) + HeapTupleIsOnlyLocked(htup)) return HEAPTUPLE_INSERT_IN_PROGRESS; /* inserted and then deleted by same xact */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(htup))) return HEAPTUPLE_DELETE_IN_PROGRESS; /* deleting subtransaction must have aborted */ return HEAPTUPLE_INSERT_IN_PROGRESS; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) { /* * It'd be possible to discern between INSERT/DELETE in progress @@ -1275,9 +1275,9 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de */ return HEAPTUPLE_INSERT_IN_PROGRESS; } - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* @@ -1319,14 +1319,14 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de * possibly be running; otherwise have to check. */ if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) && - MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), + MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true)) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); } else { - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); @@ -1344,7 +1344,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { - TransactionId xmax = HeapTupleGetUpdateXid(tuple); + TransactionId xmax = HeapTupleGetUpdateXid(htup); /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); @@ -1367,7 +1367,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de *dead_after = xmax; return HEAPTUPLE_RECENTLY_DEAD; } - else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + else if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) { /* * Not in Progress, Not Committed, so either Aborted or crashed. @@ -1381,11 +1381,11 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return HEAPTUPLE_DELETE_IN_PROGRESS; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); else { /* @@ -1407,7 +1407,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de * Deleter committed, allow caller to check if it was recent enough that * some open transactions could still see the tuple. */ - *dead_after = HeapTupleHeaderGetRawXmax(tuple); + *dead_after = HeapTupleGetRawXmax(htup); return HEAPTUPLE_RECENTLY_DEAD; } @@ -1503,7 +1503,7 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest) /* Deleter committed, so tuple is dead if the XID is old enough. */ return GlobalVisTestIsRemovableXid(vistest, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); } /* @@ -1516,8 +1516,9 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest) * at the top of this file. */ bool -HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) +HeapTupleIsOnlyLocked(HeapTuple htup) { + HeapTupleHeader tuple = htup->t_data; TransactionId xmax; /* if there's no valid Xmax, then there's obviously no update either */ @@ -1528,7 +1529,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) return true; /* invalid xmax means no update */ - if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsValid(HeapTupleGetRawXmax(htup))) return true; /* @@ -1539,7 +1540,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) return false; /* ... but if it's a multi, then perhaps the updating Xid aborted. */ - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -1587,8 +1588,8 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer) { HeapTupleHeader tuple = htup->t_data; - TransactionId xmin = HeapTupleHeaderGetXmin(tuple); - TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple); + TransactionId xmin = HeapTupleGetXmin(htup); + TransactionId xmax = HeapTupleGetRawXmax(htup); Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); @@ -1688,7 +1689,7 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, */ else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); } /* check if it's one of our txids, toplevel is also in there */ diff --git a/src/backend/access/heap/heaptoast.c b/src/backend/access/heap/heaptoast.c index 1575a81b01b..252e57cc1de 100644 --- a/src/backend/access/heap/heaptoast.c +++ b/src/backend/access/heap/heaptoast.c @@ -307,6 +307,7 @@ heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, result_tuple->t_len = new_tuple_len; result_tuple->t_self = newtup->t_self; result_tuple->t_tableOid = newtup->t_tableOid; + HeapTupleCopyBase(result_tuple, newtup); new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE); result_tuple->t_data = new_data; @@ -395,6 +396,7 @@ toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc) */ new_tuple->t_self = tup->t_self; new_tuple->t_tableOid = tup->t_tableOid; + HeapTupleCopyBase(new_tuple, tup); new_tuple->t_data->t_choice = tup->t_data->t_choice; new_tuple->t_data->t_ctid = tup->t_data->t_ctid; @@ -467,6 +469,7 @@ toast_flatten_tuple_to_datum(HeapTupleHeader tup, ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = tup; + HeapTupleSetZeroBase(&tmptup); /* * Break down the tuple into fields. diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index ae2e2ce37a5..6a098b03785 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -59,6 +59,11 @@ RelationPutHeapTuple(Relation relation, /* Add the tuple to the page */ pageHeader = BufferGetPage(buffer); + HeapTupleSetXmin(tuple, tuple->t_xmin); + HeapTupleHeaderSetXmin(pageHeader, tuple); + HeapTupleSetXmax(tuple, tuple->t_xmax); + HeapTupleHeaderSetXmax(pageHeader, tuple); + offnum = PageAddItem(pageHeader, (Item) tuple->t_data, tuple->t_len, InvalidOffsetNumber, false, true); @@ -243,7 +248,7 @@ RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) /* we'll need this info below */ blockNum = BufferGetBlockNumber(buffer); - freespace = BufferGetPageSize(buffer) - SizeOfPageHeaderData; + freespace = BufferGetPageSize(buffer) - SizeOfPageHeaderData - MAXALIGN(sizeof(HeapPageSpecialData)); UnlockReleaseBuffer(buffer); @@ -514,6 +519,9 @@ loop: /* * Now we can check to see if there's enough free space here. If so, * we're done. + * + * "Double xmax" page is not suitable for any new tuple, since xmin + * can't be set there. */ page = BufferGetPage(buffer); @@ -525,12 +533,14 @@ loop: */ if (PageIsNew(page)) { - PageInit(page, BufferGetPageSize(buffer), 0); + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; MarkBufferDirty(buffer); } pageFreeSpace = PageGetHeapFreeSpace(page); - if (targetFreeSpace <= pageFreeSpace) + if (targetFreeSpace <= pageFreeSpace && + !HeapPageIsDoubleXmax(page)) { /* use this page as future insert target, too */ RelationSetTargetBlock(relation, targetBlock); @@ -635,7 +645,8 @@ loop: BufferGetBlockNumber(buffer), RelationGetRelationName(relation)); - PageInit(page, BufferGetPageSize(buffer), 0); + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; MarkBufferDirty(buffer); /* diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 9f43bbe25f5..f494bc4430d 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -136,7 +136,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * determining the appropriate horizon is a waste if there's no prune_xid * (i.e. no updates/deletes left potentially dead tuples around). */ - prune_xid = ((PageHeader) page)->pd_prune_xid; + prune_xid = HeapPageGetPruneXidNoAssert(page); if (!TransactionIdIsValid(prune_xid)) return; @@ -207,7 +207,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer) nnewlpdead; ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin, - limited_ts, &nnewlpdead, NULL); + limited_ts, &nnewlpdead, NULL, true); /* * Report the number of tuples reclaimed to pgstats. This is @@ -268,7 +268,8 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId old_snap_xmin, TimestampTz old_snap_ts, int *nnewlpdead, - OffsetNumber *off_loc) + OffsetNumber *off_loc, + bool repairFragmentation) { int ndeleted = 0; Page page = BufferGetPage(buffer); @@ -339,6 +340,7 @@ heap_page_prune(Relation relation, Buffer buffer, htup = (HeapTupleHeader) PageGetItem(page, itemid); tup.t_data = htup; tup.t_len = ItemIdGetLength(itemid); + HeapTupleCopyBaseFromPage(&tup, page); ItemPointerSet(&(tup.t_self), blockno, offnum); /* @@ -393,13 +395,15 @@ heap_page_prune(Relation relation, Buffer buffer, heap_page_prune_execute(buffer, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, - prstate.nowunused, prstate.nunused); + prstate.nowunused, prstate.nunused, + repairFragmentation); /* * Update the page's pd_prune_xid field to either zero, or the lowest * XID of any soon-prunable tuple. */ - ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid; + if (XidFitsPage(page, prstate.new_prune_xid)) + HeapPageSetPruneXid(page, prstate.new_prune_xid); /* * Also clear the "page is full" flag, since there's no point in @@ -461,10 +465,10 @@ heap_page_prune(Relation relation, Buffer buffer, * point in repeating the prune/defrag process until something else * happens to the page. */ - if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid || + if (HeapPageGetPruneXid(page) != prstate.new_prune_xid || PageIsFull(page)) { - ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid; + HeapPageSetPruneXid(page, prstate.new_prune_xid); PageClearFull(page); MarkBufferDirtyHint(buffer, true); } @@ -601,6 +605,9 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) OffsetNumber chainitems[MaxHeapTuplesPerPage]; int nchain = 0, i; + HeapTupleData tup; + + tup.t_tableOid = RelationGetRelid(prstate->rel); rootlp = PageGetItemId(dp, rootoffnum); @@ -612,6 +619,11 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) Assert(prstate->htsv[rootoffnum] != -1); htup = (HeapTupleHeader) PageGetItem(dp, rootlp); + tup.t_data = htup; + tup.t_len = ItemIdGetLength(rootlp); + ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), rootoffnum); + HeapTupleCopyBaseFromPage(&tup, dp); + if (HeapTupleHeaderIsHeapOnly(htup)) { /* @@ -636,7 +648,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) !HeapTupleHeaderIsHotUpdated(htup)) { heap_prune_record_unused(prstate, rootoffnum); - HeapTupleHeaderAdvanceLatestRemovedXid(htup, + HeapTupleHeaderAdvanceLatestRemovedXid(&tup, &prstate->latestRemovedXid); ndeleted++; } @@ -703,11 +715,16 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) Assert(prstate->htsv[offnum] != -1); htup = (HeapTupleHeader) PageGetItem(dp, lp); + tup.t_data = htup; + tup.t_len = ItemIdGetLength(lp); + HeapTupleCopyBaseFromPage(&tup, dp); + ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum); + /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) + !TransactionIdEquals(HeapTupleGetXmin(&tup), priorXmax)) break; /* @@ -734,7 +751,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, - HeapTupleHeaderGetUpdateXid(htup)); + HeapTupleGetUpdateXidAny(&tup)); break; case HEAPTUPLE_DELETE_IN_PROGRESS: @@ -744,7 +761,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, - HeapTupleHeaderGetUpdateXid(htup)); + HeapTupleGetUpdateXidAny(&tup)); break; case HEAPTUPLE_LIVE: @@ -773,7 +790,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) if (tupdead) { latestdead = offnum; - HeapTupleHeaderAdvanceLatestRemovedXid(htup, + HeapTupleHeaderAdvanceLatestRemovedXid(&tup, &prstate->latestRemovedXid); } else if (!recent_dead) @@ -795,7 +812,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate) Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == BufferGetBlockNumber(buffer)); offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } /* @@ -912,7 +929,8 @@ void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, - OffsetNumber *nowunused, int nunused) + OffsetNumber *nowunused, int nunused, + bool repairFragmentation) { Page page = (Page) BufferGetPage(buffer); OffsetNumber *offnum; @@ -1036,7 +1054,8 @@ heap_page_prune_execute(Buffer buffer, * Finally, repair any fragmentation, and update the page's hint bit about * whether it has free pointers. */ - PageRepairFragmentation(page); + if (repairFragmentation) + PageRepairFragmentation(page); /* * Now that the page has been modified, assert that redirect items still @@ -1123,6 +1142,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) HeapTupleHeader htup; OffsetNumber nextoffnum; TransactionId priorXmax; + HeapTupleData tup; /* skip unused and dead items */ if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp)) @@ -1131,6 +1151,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) if (ItemIdIsNormal(lp)) { htup = (HeapTupleHeader) PageGetItem(page, lp); + tup.t_data = htup; + HeapTupleCopyBaseFromPage(&tup, page); /* * Check if this tuple is part of a HOT-chain rooted at some other @@ -1152,7 +1174,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) /* Set up to scan the HOT-chain */ nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } else { @@ -1191,9 +1213,11 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) break; htup = (HeapTupleHeader) PageGetItem(page, lp); + tup.t_data = htup; + HeapTupleCopyBaseFromPage(&tup, page); if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup))) + !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tup))) break; /* Remember the root line pointer for this item */ @@ -1207,7 +1231,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup)); nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } } } diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 2a53826736e..3b9dc2fb3e8 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -379,6 +379,7 @@ rewrite_heap_tuple(RewriteState state, &old_tuple->t_data->t_choice.t_heap, sizeof(HeapTupleFields)); + HeapTupleCopyBase(new_tuple, old_tuple); new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK; new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK; new_tuple->t_data->t_infomask |= @@ -388,7 +389,7 @@ rewrite_heap_tuple(RewriteState state, * While we have our hands on the tuple, we may as well freeze any * eligible xmin or xmax, so that future VACUUM effort can be saved. */ - heap_freeze_tuple(new_tuple->t_data, + heap_freeze_tuple(new_tuple, state->rs_old_rel->rd_rel->relfrozenxid, state->rs_old_rel->rd_rel->relminmxid, state->rs_freeze_xid, @@ -404,7 +405,7 @@ rewrite_heap_tuple(RewriteState state, * If the tuple has been updated, check the old-to-new mapping hash table. */ if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || - HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) && + HeapTupleIsOnlyLocked(old_tuple)) && !HeapTupleHeaderIndicatesMovedPartitions(old_tuple->t_data) && !(ItemPointerEquals(&(old_tuple->t_self), &(old_tuple->t_data->t_ctid)))) @@ -412,7 +413,7 @@ rewrite_heap_tuple(RewriteState state, OldToNewMapping mapping; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data); + hashkey.xmin = HeapTupleGetUpdateXidAny(old_tuple); hashkey.tid = old_tuple->t_data->t_ctid; mapping = (OldToNewMapping) @@ -485,7 +486,7 @@ rewrite_heap_tuple(RewriteState state, * RECENTLY_DEAD if and only if the xmin is not before OldestXmin. */ if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(new_tuple), state->rs_oldest_xmin)) { /* @@ -494,7 +495,7 @@ rewrite_heap_tuple(RewriteState state, UnresolvedTup unresolved; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); + hashkey.xmin = HeapTupleGetXmin(new_tuple); hashkey.tid = old_tid; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, @@ -582,7 +583,7 @@ rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple) bool found; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data); + hashkey.xmin = HeapTupleGetXmin(old_tuple); hashkey.tid = old_tuple->t_self; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, @@ -618,6 +619,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup) Size len; OffsetNumber newoff; HeapTuple heaptup; + TransactionId xmin; + bool immutable_tuple; /* * If the new tuple is too big for storage or contains already toasted @@ -652,9 +655,19 @@ raw_heap_insert(RewriteState state, HeapTuple tup) len = MAXALIGN(heaptup->t_len); /* be conservative */ /* - * If we're gonna fail for oversize tuple, do it right away + * Due to update to 64-xid maximum plain tuple size was decreased due to adding + * PageSpecial to a heap page. Pages with tuple that became too large to fit, + * should remain in Double Xmax format (read only). Inserting plain tuples with + * size over new MaxHeapTupleSizs is prohibited anyway, but vaccum full will + * transfer this page to a rebuild relation unmodified. + */ + immutable_tuple = len <= MaxHeapTupleSize_32 && len > MaxHeapTupleSize; + + /* + * If we're gonna fail for oversize tuple, do it right away. But allow to process + * immutable_tuple (see above). */ - if (len > MaxHeapTupleSize) + if (len > MaxHeapTupleSize && !immutable_tuple) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("row is too big: size %zu, maximum size %zu", @@ -703,10 +716,34 @@ raw_heap_insert(RewriteState state, HeapTuple tup) if (!state->rs_buffer_valid) { /* Initialize a new empty page */ - PageInit(page, BLCKSZ, 0); + if (immutable_tuple) + /* Initialize DoubleXmax page */ + PageInit(page, BLCKSZ, 0); + else + PageInit(page, BLCKSZ, sizeof(HeapPageSpecialData)); state->rs_buffer_valid = true; } + rewrite_page_prepare_for_xid(page, heaptup); + + /* + * Tuple with HEAP_XMIN_FROZEN in t_infomask should have xmin set + * to FrozenTransactionId to avoid these tuples be treated like normal. + */ + xmin = HeapTupleGetXmin(heaptup); + HeapTupleSetXmin(heaptup, xmin); + + /* + * Tuples on DoubleXmax page could not appear modified after they had been + * frozen by pg_upgrade. Just check this to be safe. + */ + Assert(!immutable_tuple || xmin == FrozenTransactionId); + + if (!immutable_tuple) + HeapTupleHeaderSetXmin(page, heaptup); + + HeapTupleHeaderSetXmax(page, heaptup); + /* And now we can insert the tuple into the page */ newoff = PageAddItem(page, (Item) heaptup->t_data, heaptup->t_len, InvalidOffsetNumber, false, true); @@ -999,7 +1036,10 @@ logical_rewrite_log_mapping(RewriteState state, TransactionId xid, "pg_logical/mappings/" LOGICAL_REWRITE_FORMAT, dboid, relid, LSN_FORMAT_ARGS(state->rs_begin_lsn), - xid, GetCurrentTransactionId()); + (uint32) (xid >> 32), + (uint32) xid, + (uint32) (GetCurrentTransactionId() >> 32), + (uint32) GetCurrentTransactionId()); dlist_init(&src->mappings); src->num_mappings = 0; @@ -1048,9 +1088,9 @@ logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid, if (!state->rs_logical_rewrite) return; - xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); + xmin = HeapTupleGetXmin(new_tuple); /* use *GetUpdateXid to correctly deal with multixacts */ - xmax = HeapTupleHeaderGetUpdateXid(new_tuple->t_data); + xmax = HeapTupleGetUpdateXidAny(new_tuple); /* * Log the mapping iff the tuple has been created recently. @@ -1121,7 +1161,10 @@ heap_xlog_logical_rewrite(XLogReaderState *r) "pg_logical/mappings/" LOGICAL_REWRITE_FORMAT, xlrec->mapped_db, xlrec->mapped_rel, LSN_FORMAT_ARGS(xlrec->start_lsn), - xlrec->mapped_xid, XLogRecGetXid(r)); + (uint32) (xlrec->mapped_xid >> 32), + (uint32) xlrec->mapped_xid, + (uint32) (XLogRecGetXid(r) >> 32), + (uint32) XLogRecGetXid(r)); fd = OpenTransientFile(path, O_CREAT | O_WRONLY | PG_BINARY); @@ -1217,10 +1260,12 @@ CheckPointLogicalRewriteHeap(void) Oid dboid; Oid relid; XLogRecPtr lsn; - TransactionId rewrite_xid; - TransactionId create_xid; - uint32 hi, - lo; + uint32 lsn_hi, + lsn_lo, + rewrite_xid_hi, + rewrite_xid_lo, + create_xid_hi, + create_xid_lo; if (strcmp(mapping_de->d_name, ".") == 0 || strcmp(mapping_de->d_name, "..") == 0) @@ -1235,10 +1280,12 @@ CheckPointLogicalRewriteHeap(void) continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, - &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6) + &dboid, &relid, &lsn_hi, &lsn_lo, + &rewrite_xid_hi, &rewrite_xid_lo, + &create_xid_hi, &create_xid_lo) != 8) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); - lsn = ((uint64) hi) << 32 | lo; + lsn = ((uint64) lsn_hi) << 32 | lsn_lo; if (lsn < cutoff || cutoff == InvalidXLogRecPtr) { diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 1171616ba64..140e2c77399 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -265,7 +265,6 @@ static bool lazy_vacuum_all_indexes(LVRelState *vacrel); static void lazy_vacuum_heap_rel(LVRelState *vacrel); static int lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, int index, Buffer *vmbuffer); -static bool lazy_check_wraparound_failsafe(LVRelState *vacrel); static void lazy_cleanup_all_indexes(LVRelState *vacrel); static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, @@ -524,7 +523,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * ensure that parallel VACUUM won't be attempted at all when relfrozenxid * is already dangerously old.) */ - lazy_check_wraparound_failsafe(vacrel); dead_items_alloc(vacrel, params->nworkers); /* @@ -641,7 +639,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, WalUsage walusage; StringInfoData buf; char *msgfmt; - int32 diff; + int64 diff; int64 PageHitOp = VacuumPageHit - StartPageHit, PageMissOp = VacuumPageMiss - StartPageMiss, PageDirtyOp = VacuumPageDirty - StartPageDirty; @@ -694,32 +692,35 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, orig_rel_pages == 0 ? 100.0 : 100.0 * vacrel->scanned_pages / orig_rel_pages); appendStringInfo(&buf, - _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"), + _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable, oldest xmin: %llu\n"), (long long) vacrel->tuples_deleted, (long long) vacrel->new_rel_tuples, - (long long) vacrel->recently_dead_tuples); + (long long) vacrel->recently_dead_tuples, + (unsigned long long) OldestXmin); if (vacrel->missed_dead_tuples > 0) appendStringInfo(&buf, _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"), (long long) vacrel->missed_dead_tuples, vacrel->missed_dead_pages); - diff = (int32) (ReadNextTransactionId() - OldestXmin); + diff = (int64) (ReadNextTransactionId() - OldestXmin); appendStringInfo(&buf, - _("removable cutoff: %llu, which was %d XIDs old when operation ended\n"), - (unsigned long long) OldestXmin, diff); + _("removable cutoff: %llu, which was %lld XIDs old when operation ended\n"), + (unsigned long long) OldestXmin, (long long) diff); if (frozenxid_updated) { - diff = (int32) (vacrel->NewRelfrozenXid - vacrel->relfrozenxid); + diff = (int64) (vacrel->NewRelfrozenXid - vacrel->relfrozenxid); appendStringInfo(&buf, - _("new relfrozenxid: %llu, which is %d XIDs ahead of previous value\n"), - (unsigned long long) vacrel->NewRelfrozenXid, diff); + _("new relfrozenxid: %llu, which is %lld XIDs ahead of previous value\n"), + (unsigned long long) vacrel->NewRelfrozenXid, + (long long) diff); } if (minmulti_updated) { - diff = (int32) (vacrel->NewRelminMxid - vacrel->relminmxid); + diff = (int64) (vacrel->NewRelminMxid - vacrel->relminmxid); appendStringInfo(&buf, - _("new relminmxid: %llu, which is %d MXIDs ahead of previous value\n"), - (unsigned long long) vacrel->NewRelminMxid, diff); + _("new relminmxid: %llu, which is %lld MXIDs ahead of previous value\n"), + (unsigned long long) vacrel->NewRelminMxid, + (long long) diff); } if (vacrel->do_index_vacuuming) { @@ -923,7 +924,6 @@ lazy_scan_heap(LVRelState *vacrel) */ if (blkno - next_failsafe_block >= FAILSAFE_EVERY_PAGES) { - lazy_check_wraparound_failsafe(vacrel); next_failsafe_block = blkno; } @@ -1443,7 +1443,9 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0) { - freespace = BLCKSZ - SizeOfPageHeaderData; + freespace = BufferGetPageSize(buf) + - SizeOfPageHeaderData + - sizeof(HeapPageSpecialData); RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); } @@ -1547,6 +1549,7 @@ lazy_scan_prune(LVRelState *vacrel, maxoff; ItemId itemid; HeapTupleData tuple; + HeapTupleHeader htup; HTSV_Result res; int tuples_deleted, lpdead_items, @@ -1589,7 +1592,7 @@ retry: */ tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest, InvalidTransactionId, 0, &nnewlpdead, - &vacrel->offnum); + &vacrel->offnum, true); /* * Now scan the page to collect LP_DEAD items and check for tuples @@ -1655,6 +1658,7 @@ retry: tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyBaseFromPage(&tuple, page); /* * DEAD tuples are almost always pruned into LP_DEAD line pointers by @@ -1718,7 +1722,7 @@ retry: * The inserter definitely committed. But is it old enough * that everyone sees it as committed? */ - xmin = HeapTupleHeaderGetXmin(tuple.t_data); + xmin = HeapTupleGetXmin(&tuple); if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin)) { prunestate->all_visible = false; @@ -1774,7 +1778,7 @@ retry: * now. */ prunestate->hastup = true; /* page makes rel truncation unsafe */ - if (heap_prepare_freeze_tuple(tuple.t_data, + if (heap_prepare_freeze_tuple(&tuple, vacrel->relfrozenxid, vacrel->relminmxid, vacrel->FreezeLimit, @@ -1828,12 +1832,9 @@ retry: /* execute collected freezes */ for (int i = 0; i < nfrozen; i++) { - HeapTupleHeader htup; - itemid = PageGetItemId(page, frozen[i].offset); htup = (HeapTupleHeader) PageGetItem(page, itemid); - - heap_execute_freeze_tuple(htup, &frozen[i]); + heap_execute_freeze_tuple_page(page, htup, &frozen[i]); } /* Now WAL-log freezing if necessary */ @@ -1952,7 +1953,6 @@ lazy_scan_noprune(LVRelState *vacrel, live_tuples, recently_dead_tuples, missed_dead_tuples; - HeapTupleHeader tupleheader; TransactionId NewRelfrozenXid = vacrel->NewRelfrozenXid; MultiXactId NewRelminMxid = vacrel->NewRelminMxid; OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; @@ -1998,8 +1998,13 @@ lazy_scan_noprune(LVRelState *vacrel, } *hastup = true; /* page prevents rel truncation */ - tupleheader = (HeapTupleHeader) PageGetItem(page, itemid); - if (heap_tuple_would_freeze(tupleheader, + ItemPointerSet(&(tuple.t_self), blkno, offnum); + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(vacrel->rel); + HeapTupleCopyBaseFromPage(&tuple, page); + + if (heap_tuple_would_freeze(&tuple, vacrel->FreezeLimit, vacrel->MultiXactCutoff, &NewRelfrozenXid, &NewRelminMxid)) @@ -2032,11 +2037,6 @@ lazy_scan_noprune(LVRelState *vacrel, */ } - ItemPointerSet(&(tuple.t_self), blkno, offnum); - tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); - tuple.t_len = ItemIdGetLength(itemid); - tuple.t_tableOid = RelationGetRelid(vacrel->rel); - switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf)) { case HEAPTUPLE_DELETE_IN_PROGRESS: @@ -2306,13 +2306,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) Assert(vacrel->do_index_vacuuming); Assert(vacrel->do_index_cleanup); - /* Precheck for XID wraparound emergencies */ - if (lazy_check_wraparound_failsafe(vacrel)) - { - /* Wraparound emergency -- don't even start an index scan */ - return false; - } - /* Report that we are now vacuuming indexes */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_VACUUM_INDEX); @@ -2327,13 +2320,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) vacrel->indstats[idx] = lazy_vacuum_one_index(indrel, istat, vacrel->old_live_tuples, vacrel); - - if (lazy_check_wraparound_failsafe(vacrel)) - { - /* Wraparound emergency -- end current index scan */ - allindexes = false; - break; - } } } else @@ -2341,13 +2327,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) /* Outsource everything to parallel variant */ parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, vacrel->old_live_tuples, vacrel->num_index_scans); - - /* - * Do a postcheck to consider applying wraparound failsafe now. Note - * that parallel VACUUM only gets the precheck and this postcheck. - */ - if (lazy_check_wraparound_failsafe(vacrel)) - allindexes = false; } /* @@ -2593,58 +2572,6 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, return index; } -/* - * Trigger the failsafe to avoid wraparound failure when vacrel table has a - * relfrozenxid and/or relminmxid that is dangerously far in the past. - * Triggering the failsafe makes the ongoing VACUUM bypass any further index - * vacuuming and heap vacuuming. Truncating the heap is also bypassed. - * - * Any remaining work (work that VACUUM cannot just bypass) is typically sped - * up when the failsafe triggers. VACUUM stops applying any cost-based delay - * that it started out with. - * - * Returns true when failsafe has been triggered. - */ -static bool -lazy_check_wraparound_failsafe(LVRelState *vacrel) -{ - Assert(TransactionIdIsNormal(vacrel->relfrozenxid)); - Assert(MultiXactIdIsValid(vacrel->relminmxid)); - - /* Don't warn more than once per VACUUM */ - if (vacrel->failsafe_active) - return true; - - if (unlikely(vacuum_xid_failsafe_check(vacrel->relfrozenxid, - vacrel->relminmxid))) - { - vacrel->failsafe_active = true; - - /* Disable index vacuuming, index cleanup, and heap rel truncation */ - vacrel->do_index_vacuuming = false; - vacrel->do_index_cleanup = false; - vacrel->do_rel_truncate = false; - - ereport(WARNING, - (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans", - get_database_name(MyDatabaseId), - vacrel->relnamespace, - vacrel->relname, - vacrel->num_index_scans), - errdetail("The table's relfrozenxid or relminmxid is too far in the past."), - errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n" - "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs."))); - - /* Stop applying cost limits from this point on */ - VacuumCostActive = false; - VacuumCostBalance = 0; - - return true; - } - - return false; -} - /* * lazy_cleanup_all_indexes() -- cleanup all indexes of relation. */ @@ -3272,6 +3199,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(vacrel->rel); + HeapTupleCopyBaseFromPage(&tuple, page); switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->OldestXmin, buf)) { @@ -3291,7 +3219,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, * The inserter definitely committed. But is it old enough * that everyone sees it as committed? */ - xmin = HeapTupleHeaderGetXmin(tuple.t_data); + xmin = HeapTupleGetXmin(&tuple); if (!TransactionIdPrecedes(xmin, vacrel->OldestXmin)) { all_visible = false; @@ -3305,7 +3233,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, /* Check whether this tuple is already frozen or not */ if (all_visible && *all_frozen && - heap_tuple_needs_eventual_freeze(tuple.t_data)) + heap_tuple_needs_eventual_freeze(&tuple)) *all_frozen = false; } break; diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 20adb602a4d..9e8b59bc1dc 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -546,6 +546,7 @@ _bt_getroot(Relation rel, int access) rootblkno = rootopaque->btpo_next; } + /* Note: can't check btpo_level on deleted pages */ if (rootopaque->btpo_level != rootlevel) elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u", rootblkno, RelationGetRelationName(rel), @@ -649,6 +650,7 @@ _bt_gettrueroot(Relation rel) rootblkno = rootopaque->btpo_next; } + /* Note: can't check btpo_level on deleted pages */ if (rootopaque->btpo_level != rootlevel) elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u", rootblkno, RelationGetRelationName(rel), diff --git a/src/backend/access/nbtree/nbtsplitloc.c b/src/backend/access/nbtree/nbtsplitloc.c index 241e26d3385..c712ee645f0 100644 --- a/src/backend/access/nbtree/nbtsplitloc.c +++ b/src/backend/access/nbtree/nbtsplitloc.c @@ -140,6 +140,7 @@ _bt_findsplitloc(Relation rel, olddataitemstoleft, perfectpenalty, leaffillfactor; + int maxTupleEnd PG_USED_FOR_ASSERTS_ONLY; FindSplitData state; FindSplitStrat strategy; ItemId itemid; @@ -153,6 +154,7 @@ _bt_findsplitloc(Relation rel, opaque = BTPageGetOpaque(origpage); maxoff = PageGetMaxOffsetNumber(origpage); + maxTupleEnd = ItemIdGetTupleEnd(PageGetItemId(origpage, P_HIKEY)); /* Total free space available on a btree page, after fixed overhead */ leftspace = rightspace = @@ -214,6 +216,18 @@ _bt_findsplitloc(Relation rel, itemid = PageGetItemId(origpage, offnum); itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData); +#ifdef USE_ASSERT_CHECKING + + /* + * Ending of rightmost tuple on a page can be shifted relative to left + * boundary of BTPageOpaqueData due to conversion from EE96, which + * used different BTPageOpaqueData layout. It is only checked in the + * assert below. + */ + if (maxTupleEnd < ItemIdGetTupleEnd(itemid)) + maxTupleEnd = ItemIdGetTupleEnd(itemid); +#endif + /* * When item offset number is not newitemoff, neither side of the * split can be newitem. Record a split after the previous data item @@ -248,7 +262,7 @@ _bt_findsplitloc(Relation rel, * (Though only when it's possible that newitem will end up alone on new * right page.) */ - Assert(olddataitemstoleft == olddataitemstotal); + Assert(olddataitemstoleft + ((PageHeader) origpage)->pd_special - maxTupleEnd == olddataitemstotal); if (newitemoff > maxoff) _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0); diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index f9186ca233a..ff93328c2e4 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -15,6 +15,8 @@ #include "postgres.h" #include "access/bufmask.h" +#include "access/heapam_xlog.h" +#include "access/htup_details.h" #include "access/nbtree.h" #include "access/nbtxlog.h" #include "access/transam.h" diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index 27f949a38d0..1d96601da55 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -29,7 +29,7 @@ out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec) appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %llu", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->block, - (unsigned long long) U64FromFullTransactionId(xlrec->latestRemovedFullXid)); + (unsigned long long) XidFromFullTransactionId(xlrec->latestRemovedFullXid)); } static void @@ -51,7 +51,7 @@ static void out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec) { appendStringInfo(buf, "deleteXid %llu; downlink %u", - (unsigned long long) U64FromFullTransactionId(xlrec->deleteXid), + (unsigned long long) XidFromFullTransactionId(xlrec->deleteXid), xlrec->downlinkOffset); } diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index f3fb61971f5..2c01d05ba83 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -182,6 +182,23 @@ heap2_desc(StringInfo buf, XLogReaderState *record) } } +void +heap3_desc(StringInfo buf, XLogReaderState *record) +{ + char *rec = XLogRecGetData(record); + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + info &= XLOG_HEAP_OPMASK; + if (info == XLOG_HEAP3_BASE_SHIFT) + { + xl_heap_base_shift *xlrec = (xl_heap_base_shift *) rec; + + appendStringInfo(buf, "%s delta %lld ", + xlrec->multi ? "MultiXactId" : "XactId", + (long long) xlrec->delta); + } +} + const char * heap_identify(uint8 info) { @@ -265,3 +282,18 @@ heap2_identify(uint8 info) return id; } + +const char * +heap3_identify(uint8 info) +{ + const char *id = NULL; + + switch (info & ~XLR_INFO_MASK) + { + case XLOG_HEAP3_BASE_SHIFT: + id = "BASE_SHIFT"; + break; + } + + return id; +} diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c index 950f9269f76..b9c1826770f 100644 --- a/src/backend/access/rmgrdesc/mxactdesc.c +++ b/src/backend/access/rmgrdesc/mxactdesc.c @@ -65,9 +65,9 @@ multixact_desc(StringInfo buf, XLogReaderState *record) xl_multixact_create *xlrec = (xl_multixact_create *) rec; int i; - appendStringInfo(buf, "%llu offset %u nmembers %d: ", + appendStringInfo(buf, "%llu offset %llu nmembers %d: ", (unsigned long long) xlrec->mid, - xlrec->moff, xlrec->nmembers); + (unsigned long long) xlrec->moff, xlrec->nmembers); for (i = 0; i < xlrec->nmembers; i++) out_member(buf, &xlrec->members[i]); } @@ -75,10 +75,11 @@ multixact_desc(StringInfo buf, XLogReaderState *record) { xl_multixact_truncate *xlrec = (xl_multixact_truncate *) rec; - appendStringInfo(buf, "offsets [%llu, %llu), members [%u, %u)", + appendStringInfo(buf, "offsets [%llu, %llu), members [%llu, %llu)", (unsigned long long) xlrec->startTruncOff, (unsigned long long) xlrec->endTruncOff, - xlrec->startTruncMemb, xlrec->endTruncMemb); + (unsigned long long) xlrec->startTruncMemb, + (unsigned long long) xlrec->endTruncMemb); } } diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index cd980053ab7..fdf307a565b 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -83,7 +83,7 @@ btree_desc(StringInfo buf, XLogReaderState *record) appendStringInfo(buf, "left %u; right %u; level %u; safexid %llu; ", xlrec->leftsib, xlrec->rightsib, xlrec->level, - (unsigned long long) U64FromFullTransactionId(xlrec->safexid)); + (unsigned long long) XidFromFullTransactionId(xlrec->safexid)); appendStringInfo(buf, "leafleft %u; leafright %u; leaftopparent %u", xlrec->leafleftsib, xlrec->leafrightsib, xlrec->leaftopparent); @@ -103,7 +103,7 @@ btree_desc(StringInfo buf, XLogReaderState *record) appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %llu", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, - (unsigned long long) U64FromFullTransactionId(xlrec->latestRemovedFullXid)); + (unsigned long long) XidFromFullTransactionId(xlrec->latestRemovedFullXid)); break; } case XLOG_BTREE_META_CLEANUP: diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c index eba6c67c11b..a92d47adbc7 100644 --- a/src/backend/access/rmgrdesc/xactdesc.c +++ b/src/backend/access/rmgrdesc/xactdesc.c @@ -110,7 +110,8 @@ ParseCommitRecord(uint8 info, xl_xact_commit *xlrec, xl_xact_parsed_commit *pars { xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; - parsed->twophase_xid = xl_twophase->xid; + parsed->twophase_xid = + ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo; data += sizeof(xl_xact_twophase); @@ -205,7 +206,8 @@ ParseAbortRecord(uint8 info, xl_xact_abort *xlrec, xl_xact_parsed_abort *parsed) { xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; - parsed->twophase_xid = xl_twophase->xid; + parsed->twophase_xid = + ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo; data += sizeof(xl_xact_twophase); diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 230b85fc698..5758a872155 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -45,7 +45,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "redo %X/%X; " - "tli %u; prev tli %u; fpw %s; xid %llu; oid %u; multi %llu; offset %u; " + "tli %u; prev tli %u; fpw %s; xid %llu; oid %u; multi %llu; offset %llu; " "oldest xid %llu in DB %u; oldest multi %llu in DB %u; " "oldest/newest commit timestamp xid: %llu/%llu; " "oldest running xid %llu; %s", @@ -53,10 +53,10 @@ xlog_desc(StringInfo buf, XLogReaderState *record) checkpoint->ThisTimeLineID, checkpoint->PrevTimeLineID, checkpoint->fullPageWrites ? "true" : "false", - (unsigned long long) U64FromFullTransactionId(checkpoint->nextXid), + (unsigned long long) XidFromFullTransactionId(checkpoint->nextXid), checkpoint->nextOid, (unsigned long long) checkpoint->nextMulti, - checkpoint->nextMultiOffset, + (unsigned long long) checkpoint->nextMultiOffset, (unsigned long long) checkpoint->oldestXid, checkpoint->oldestXidDB, (unsigned long long) checkpoint->oldestMulti, diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 46cb69ece07..7fa9a618bc7 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -713,7 +713,7 @@ void BootStrapCLOG(void) { int slotno; - int pageno; + int64 pageno; LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); @@ -727,7 +727,10 @@ BootStrapCLOG(void) pageno = TransactionIdToPage(XidFromFullTransactionId(ShmemVariableCache->nextXid)); if (pageno != 0) { + /* Create and zero the first page of the commit log */ slotno = ZeroCLOGPage(pageno, false); + + /* Make sure it's written out */ SimpleLruWritePage(XactCtl, slotno); Assert(!XactCtl->shared->page_dirty[slotno]); } @@ -921,24 +924,11 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) SimpleLruTruncate(XactCtl, cutoffPage); } - /* * Decide whether a CLOG page number is "older" for truncation purposes. * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, TransactionIdPrecedes() - * would get weird about permanent xact IDs. So, offset both such that xid1, - * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset - * is relevant to page 0 and to the page preceding page 0. - * - * The page containing oldestXact-2^31 is the important edge case. The - * portion of that page equaling or following oldestXact-2^31 is expendable, - * but the portion preceding oldestXact-2^31 is not. When oldestXact-2^31 is - * the first XID of a page and segment, the entire page and segment is - * expendable, and we could truncate the segment. Recognizing that case would - * require making oldestXact, not just the page containing oldestXact, - * available to this callback. The benefit would be rare and small, so we - * don't optimize that edge case. + * With 64xid this function is just "<", but we left it as a function in order + * for its calls remain "vanilla" like. */ static bool CLOGPagePrecedes(int64 page1, int64 page2) diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index f5713dd7786..f08905d6a70 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -919,25 +919,6 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact) /* * Decide whether a commitTS page number is "older" for truncation purposes. * Analogous to CLOGPagePrecedes(). - * - * At default BLCKSZ, (1 << 31) % COMMIT_TS_XACTS_PER_PAGE == 128. This - * introduces differences compared to CLOG and the other SLRUs having (1 << - * 31) % per_page == 0. This function never tests exactly - * TransactionIdPrecedes(x-2^31, x). When the system reaches xidStopLimit, - * there are two possible counts of page boundaries between oldestXact and the - * latest XID assigned, depending on whether oldestXact is within the first - * 128 entries of its page. Since this function doesn't know the location of - * oldestXact within page2, it returns false for one page that actually is - * expendable. This is a wider (yet still negligible) version of the - * truncation opportunity that CLOGPagePrecedes() cannot recognize. - * - * For the sake of a worked example, number entries with decimal values such - * that page1==1 entries range from 1.0 to 1.999. Let N+0.15 be the number of - * pages that 2^31 entries will span (N is an integer). If oldestXact=N+2.1, - * then the final safe XID assignment leaves newestXact=1.95. We keep page 2, - * because entry=2.85 is the border that toggles whether entries precede the - * last entry of the oldestXact page. While page 2 is expendable at - * oldestXact=N+2.1, it would be precious at oldestXact=N+2.9. */ static bool CommitTsPagePrecedes(int64 page1, int64 page2) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 15ba6effc43..b4cdc875f87 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -112,15 +112,15 @@ ((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE) #define MultiXactIdToOffsetEntry(xid) \ ((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE) -#define MultiXactIdToOffsetSegment(xid) (MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT) +#define MultiXactIdToOffsetSegment(xid) ((uint64)(MultiXactIdToOffsetPage(xid) / SLRU_PAGES_PER_SEGMENT)) /* * The situation for members is a bit more complex: we store one byte of * additional flag bits for each TransactionId. To do this without getting - * into alignment issues, we store four bytes of flags, and then the - * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and - * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups - * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and + * into alignment issues, we store eight bytes of flags, and then the + * corresponding 8 Xids. Each such 9-word (72-byte) set we call a "group", and + * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 113 groups + * per page. This wastes 56 bytes per page, but that's OK -- simplicity (and * performance) trumps space efficiency here. * * Note that the "offset" macros work with byte offset, not array indexes, so @@ -132,7 +132,7 @@ #define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) /* how many full bytes of flags are there in a group? */ -#define MULTIXACT_FLAGBYTES_PER_GROUP 4 +#define MULTIXACT_FLAGBYTES_PER_GROUP 8 #define MULTIXACT_MEMBERS_PER_MEMBERGROUP \ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE) /* size in bytes of a complete group */ @@ -142,22 +142,9 @@ #define MULTIXACT_MEMBERS_PER_PAGE \ (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP) -/* - * Because the number of items per page is not a divisor of the last item - * number (member 0xFFFFFFFF), the last segment does not use the maximum number - * of pages, and moreover the last used page therein does not use the same - * number of items as previous pages. (Another way to say it is that the - * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page - * has some empty space after that item.) - * - * This constant is the number of members in the last page of the last segment. - */ -#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \ - ((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1)) - /* page in which a member is to be found */ #define MXOffsetToMemberPage(xid) ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE) -#define MXOffsetToMemberSegment(xid) (MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT) +#define MXOffsetToMemberSegment(xid) ((uint64)(MXOffsetToMemberPage(xid) / SLRU_PAGES_PER_SEGMENT)) /* Location (byte offset within page) of flag word for a given member */ #define MXOffsetToFlagsOffset(xid) \ @@ -216,22 +203,8 @@ typedef struct MultiXactStateData MultiXactId oldestMultiXactId; Oid oldestMultiXactDB; - /* - * Oldest multixact offset that is potentially referenced by a multixact - * referenced by a relation. We don't always know this value, so there's - * a flag here to indicate whether or not we currently do. - */ - MultiXactOffset oldestOffset; - bool oldestOffsetKnown; - /* support for anti-wraparound measures */ MultiXactId multiVacLimit; - MultiXactId multiWarnLimit; - MultiXactId multiStopLimit; - MultiXactId multiWrapLimit; - - /* support for members anti-wraparound measures */ - MultiXactOffset offsetStopLimit; /* known if oldestOffsetKnown */ /* * Per-backend data starts here. We have two arrays stored in the area @@ -361,9 +334,6 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2); static void ExtendMultiXactOffset(MultiXactId multi); static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers); -static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary, - MultiXactOffset start, uint32 distance); -static bool SetOffsetVacuumLimit(bool is_startup); static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result); static void WriteMZeroPageXlogRec(int64 pageno, uint8 info); static void WriteMTruncateXlogRec(Oid oldestMultiDB, @@ -397,6 +367,9 @@ MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1, /* MultiXactIdSetOldestMember() must have been called already. */ Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId])); + /* memset members array because with 64-bit xids it has a padding hole */ + MemSet(members, 0, sizeof(members)); + /* * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs * are still running. In typical usage, xid2 will be our own XID and the @@ -512,7 +485,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status) * end of the loop. */ newMembers = (MultiXactMember *) - palloc(sizeof(MultiXactMember) * (nmembers + 1)); + palloc0(sizeof(MultiXactMember) * (nmembers + 1)); for (i = 0, j = 0; i < nmembers; i++) { @@ -527,7 +500,6 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status) newMembers[j].xid = xid; newMembers[j++].status = status; - newMulti = MultiXactIdCreateFromMembers(j, newMembers); pfree(members); @@ -903,8 +875,8 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, for (i = 0; i < nmembers; i++, offset++) { TransactionId *memberptr; - uint32 *flagsptr; - uint32 flagsval; + uint64 *flagsptr; + uint64 flagsval; int bshift; int flagsoff; int memberoff; @@ -927,12 +899,12 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, *memberptr = members[i].xid; - flagsptr = (uint32 *) + flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); flagsval = *flagsptr; - flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift); - flagsval |= (members[i].status << bshift); + flagsval &= ~((uint64) ((1ULL << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift); + flagsval |= ((uint64) members[i].status << bshift); *flagsptr = flagsval; MultiXactMemberCtl->shared->page_dirty[slotno] = true; @@ -985,8 +957,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) * If we're past multiVacLimit or the safe threshold for member storage * space, or we don't know what the safe threshold for member storage is, * start trying to force autovacuum cycles. - * If we're past multiWarnLimit, start issuing warnings. - * If we're past multiStopLimit, refuse to create new MultiXactIds. * * Note these are pretty much the same protections in GetNewTransactionId. *---------- @@ -1000,41 +970,9 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ - MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit; - MultiXactId multiStopLimit = MultiXactState->multiStopLimit; - MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit; - Oid oldest_datoid = MultiXactState->oldestMultiXactDB; LWLockRelease(MultiXactGenLock); - if (IsUnderPostmaster && - !MultiXactIdPrecedes(result, multiStopLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* - * Immediately kick autovacuum into action as we're already in - * ERROR territory. - */ - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database \"%s\"", - oldest_datname), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database with OID %u", - oldest_datoid), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - /* * To avoid swamping the postmaster with signals, we issue the autovac * request only once per 64K multis generated. This still gives @@ -1043,31 +981,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) if (IsUnderPostmaster && (result % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - if (!MultiXactIdPrecedes(result, multiWarnLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(WARNING, - (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used", - "database \"%s\" must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - result, - oldest_datname, - multiWrapLimit - result), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used", - "database with OID %u must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - result, - oldest_datoid, - multiWrapLimit - result), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - /* Re-acquire lock and start over */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); result = MultiXactState->nextMXact; @@ -1092,78 +1005,6 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) else *offset = nextOffset; - /*---------- - * Protect against overrun of the members space as well, with the - * following rules: - * - * If we're past offsetStopLimit, refuse to generate more multis. - * If we're close to offsetStopLimit, emit a warning. - * - * Arbitrarily, we start emitting warnings when we're 20 segments or less - * from offsetStopLimit. - * - * Note we haven't updated the shared state yet, so if we fail at this - * point, the multixact ID we grabbed can still be used by the next guy. - * - * Note that there is no point in forcing autovacuum runs here: the - * multixact freeze settings would have to be reduced for that to have any - * effect. - *---------- - */ -#define OFFSET_WARN_SEGMENTS 20 - if (MultiXactState->oldestOffsetKnown && - MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset, - nmembers)) - { - /* see comment in the corresponding offsets wraparound case */ - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("multixact \"members\" limit exceeded"), - errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.", - "This command would create a multixact with %u members, but the remaining space is only enough for %u members.", - MultiXactState->offsetStopLimit - nextOffset - 1, - nmembers, - MultiXactState->offsetStopLimit - nextOffset - 1), - errhint("Execute a database-wide VACUUM in database with OID %u with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings.", - MultiXactState->oldestMultiXactDB))); - } - - /* - * Check whether we should kick autovacuum into action, to prevent members - * wraparound. NB we use a much larger window to trigger autovacuum than - * just the warning limit. The warning is just a measure of last resort - - * this is in line with GetNewTransactionId's behaviour. - */ - if (!MultiXactState->oldestOffsetKnown || - (MultiXactState->nextOffset - MultiXactState->oldestOffset - > MULTIXACT_MEMBER_SAFE_THRESHOLD)) - { - /* - * To avoid swamping the postmaster with signals, we issue the autovac - * request only when crossing a segment boundary. With default - * compilation settings that's roughly after 50k members. This still - * gives plenty of chances before we get into real trouble. - */ - if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) != - (MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT)) - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - } - - if (MultiXactState->oldestOffsetKnown && - MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, - nextOffset, - nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS)) - ereport(WARNING, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used", - "database with OID %u must be vacuumed before %d more multixact members are used", - MultiXactState->offsetStopLimit - nextOffset + nmembers, - MultiXactState->oldestMultiXactDB, - MultiXactState->offsetStopLimit - nextOffset + nmembers), - errhint("Execute a database-wide VACUUM in that database with reduced vacuum_multixact_freeze_min_age and vacuum_multixact_freeze_table_age settings."))); - ExtendMultiXactMember(nextOffset, nmembers); /* @@ -1192,8 +1033,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) LWLockRelease(MultiXactGenLock); - debug_elog4(DEBUG2, "GetNew: returning %llu offset %u", - (unsigned long long) result, *offset); + debug_elog4(DEBUG2, "GetNew: returning %llu offset %llu", + (unsigned long long) result, (unsigned long long) *offset); return result; } @@ -1303,14 +1144,14 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, if (MultiXactIdPrecedes(multi, oldestMXact)) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("MultiXactId %u does no longer exist -- apparent wraparound", - multi))); + errmsg("MultiXactId %llu does no longer exist -- apparent wraparound", + (unsigned long long) multi))); if (!MultiXactIdPrecedes(multi, nextMXact)) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("MultiXactId %u has not been created yet -- apparent wraparound", - multi))); + errmsg("MultiXactId %llu has not been created yet -- apparent wraparound", + (unsigned long long) multi))); /* * Find out the offset at which we need to start reading MultiXactMembers @@ -1356,7 +1197,10 @@ retry: offptr += entryno; offset = *offptr; - Assert(offset != 0); + if (offset == 0) + ereport(ERROR, + (errmsg("found invalid zero offset in multixact %llu", + (unsigned long long) multi))); /* * Use the same increment rule as GetNewMultiXactId(), that is, don't @@ -1403,7 +1247,7 @@ retry: LWLockRelease(MultiXactOffsetSLRULock); - ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember)); + ptr = (MultiXactMember *) palloc0(length * sizeof(MultiXactMember)); /* Now get the members themselves. */ LWLockAcquire(MultiXactMemberSLRULock, LW_EXCLUSIVE); @@ -1413,7 +1257,7 @@ retry: for (i = 0; i < length; i++, offset++) { TransactionId *xactptr; - uint32 *flagsptr; + uint64 *flagsptr; int flagsoff; int bshift; int memberoff; @@ -1439,7 +1283,7 @@ retry: flagsoff = MXOffsetToFlagsOffset(offset); bshift = MXOffsetToFlagsBitShift(offset); - flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); + flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); ptr[truelength].xid = *xactptr; ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK; @@ -1904,7 +1748,7 @@ void BootStrapMultiXact(void) { int slotno; - int pageno; + int64 pageno; LWLockAcquire(MultiXactOffsetSLRULock, LW_EXCLUSIVE); @@ -2225,8 +2069,9 @@ void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset) { - debug_elog4(DEBUG2, "MultiXact: setting next multi to %llu offset %u", - (unsigned long long) nextMulti, nextMultiOffset); + debug_elog4(DEBUG2, "MultiXact: setting next multi to %llu offset %llu", + (unsigned long long) nextMulti, + (unsigned long long) nextMultiOffset); LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); MultiXactState->nextMXact = nextMulti; MultiXactState->nextOffset = nextMultiOffset; @@ -2260,47 +2105,9 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup) { MultiXactId multiVacLimit; - MultiXactId multiWarnLimit; - MultiXactId multiStopLimit; - MultiXactId multiWrapLimit; - MultiXactId curMulti; - bool needs_offset_vacuum; Assert(MultiXactIdIsValid(oldest_datminmxid)); - /* - * We pretend that a wrap will happen halfway through the multixact ID - * space, but that's not really true, because multixacts wrap differently - * from transaction IDs. Note that, separately from any concern about - * multixact IDs wrapping, we must ensure that multixact members do not - * wrap. Limits for that are set in SetOffsetVacuumLimit, not here. - */ - multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1); - if (multiWrapLimit < FirstMultiXactId) - multiWrapLimit += FirstMultiXactId; - - /* - * We'll refuse to continue assigning MultiXactIds once we get within 3M - * multi of data loss. See SetTransactionIdLimit. - */ - multiStopLimit = multiWrapLimit - 3000000; - if (multiStopLimit < FirstMultiXactId) - multiStopLimit -= FirstMultiXactId; - - /* - * We'll start complaining loudly when we get within 40M multis of data - * loss. This is kind of arbitrary, but if you let your gas gauge get - * down to 2% of full, would you be looking for the next gas station? We - * need to be fairly liberal about this number because there are lots of - * scenarios where most transactions are done by automatic clients that - * won't pay attention to warnings. (No, we're not gonna make this - * configurable. If you know enough to configure it, you know enough to - * not get in this kind of trouble in the first place.) - */ - multiWarnLimit = multiWrapLimit - 40000000; - if (multiWarnLimit < FirstMultiXactId) - multiWarnLimit -= FirstMultiXactId; - /* * We'll start trying to force autovacuums when oldest_datminmxid gets to * be more than autovacuum_multixact_freeze_max_age mxids old. @@ -2310,25 +2117,14 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, * its value. See SetTransactionIdLimit. */ multiVacLimit = oldest_datminmxid + autovacuum_multixact_freeze_max_age; - if (multiVacLimit < FirstMultiXactId) - multiVacLimit += FirstMultiXactId; /* Grab lock for just long enough to set the new limit values */ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); MultiXactState->oldestMultiXactId = oldest_datminmxid; MultiXactState->oldestMultiXactDB = oldest_datoid; MultiXactState->multiVacLimit = multiVacLimit; - MultiXactState->multiWarnLimit = multiWarnLimit; - MultiXactState->multiStopLimit = multiStopLimit; - MultiXactState->multiWrapLimit = multiWrapLimit; - curMulti = MultiXactState->nextMXact; LWLockRelease(MultiXactGenLock); - /* Log the info */ - ereport(DEBUG1, - (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u", - multiWrapLimit, oldest_datoid))); - /* * Computing the actual limits is only possible once the data directory is * in a consistent state. There's no need to compute the limits while @@ -2340,59 +2136,6 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, return; Assert(!InRecovery); - - /* Set limits for offset vacuum. */ - needs_offset_vacuum = SetOffsetVacuumLimit(is_startup); - - /* - * If past the autovacuum force point, immediately signal an autovac - * request. The reason for this is that autovac only processes one - * database per invocation. Once it's finished cleaning up the oldest - * database, it'll call here, and we'll signal the postmaster to start - * another iteration immediately if there are still any old databases. - */ - if ((MultiXactIdPrecedes(multiVacLimit, curMulti) || - needs_offset_vacuum) && IsUnderPostmaster) - SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* Give an immediate warning if past the wrap warn point */ - if (MultiXactIdPrecedes(multiWarnLimit, curMulti)) - { - char *oldest_datname; - - /* - * We can be called when not inside a transaction, for example during - * StartupXLOG(). In such a case we cannot do database access, so we - * must just report the oldest DB's OID. - * - * Note: it's also possible that get_database_name fails and returns - * NULL, for example because the database just got dropped. We'll - * still warn, even though the warning might now be unnecessary. - */ - if (IsTransactionState()) - oldest_datname = get_database_name(oldest_datoid); - else - oldest_datname = NULL; - - if (oldest_datname) - ereport(WARNING, - (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used", - "database \"%s\" must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - curMulti, - oldest_datname, - multiWrapLimit - curMulti), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used", - "database with OID %u must be vacuumed before %u more MultiXactIds are used", - multiWrapLimit - curMulti, - oldest_datoid, - multiWrapLimit - curMulti), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } } /* @@ -2417,8 +2160,8 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti, } if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset)) { - debug_elog3(DEBUG2, "MultiXact: setting next offset to %u", - minMultiOffset); + debug_elog3(DEBUG2, "MultiXact: setting next offset to %llu", + (unsigned long long) minMultiOffset); MultiXactState->nextOffset = minMultiOffset; } LWLockRelease(MultiXactGenLock); @@ -2490,7 +2233,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers) { int flagsoff; int flagsbit; - uint32 difference; + uint64 difference; /* * Only zero when at first entry of a page. @@ -2511,23 +2254,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers) LWLockRelease(MultiXactMemberSLRULock); } - /* - * Compute the number of items till end of current page. Careful: if - * addition of unsigned ints wraps around, we're at the last page of - * the last segment; since that page holds a different number of items - * than other pages, we need to do it differently. - */ - if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset) - { - /* - * This is the last page of the last segment; we can compute the - * number of items left to allocate in it without modulo - * arithmetic. - */ - difference = MaxMultiXactOffset - offset + 1; - } - else - difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE; + difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE; /* * Advance to next page, taking care to properly handle the wraparound @@ -2591,184 +2318,6 @@ GetOldestMultiXactId(void) return oldestMXact; } -/* - * Determine how aggressively we need to vacuum in order to prevent member - * wraparound. - * - * To do so determine what's the oldest member offset and install the limit - * info in MultiXactState, where it can be used to prevent overrun of old data - * in the members SLRU area. - * - * The return value is true if emergency autovacuum is required and false - * otherwise. - */ -static bool -SetOffsetVacuumLimit(bool is_startup) -{ - MultiXactId oldestMultiXactId; - MultiXactId nextMXact; - MultiXactOffset oldestOffset = 0; /* placate compiler */ - MultiXactOffset prevOldestOffset; - MultiXactOffset nextOffset; - bool oldestOffsetKnown = false; - bool prevOldestOffsetKnown; - MultiXactOffset offsetStopLimit = 0; - MultiXactOffset prevOffsetStopLimit; - - /* - * NB: Have to prevent concurrent truncation, we might otherwise try to - * lookup an oldestMulti that's concurrently getting truncated away. - */ - LWLockAcquire(MultiXactTruncationLock, LW_SHARED); - - /* Read relevant fields from shared memory. */ - LWLockAcquire(MultiXactGenLock, LW_SHARED); - oldestMultiXactId = MultiXactState->oldestMultiXactId; - nextMXact = MultiXactState->nextMXact; - nextOffset = MultiXactState->nextOffset; - prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown; - prevOldestOffset = MultiXactState->oldestOffset; - prevOffsetStopLimit = MultiXactState->offsetStopLimit; - Assert(MultiXactState->finishedStartup); - LWLockRelease(MultiXactGenLock); - - /* - * Determine the offset of the oldest multixact. Normally, we can read - * the offset from the multixact itself, but there's an important special - * case: if there are no multixacts in existence at all, oldestMXact - * obviously can't point to one. It will instead point to the multixact - * ID that will be assigned the next time one is needed. - */ - if (oldestMultiXactId == nextMXact) - { - /* - * When the next multixact gets created, it will be stored at the next - * offset. - */ - oldestOffset = nextOffset; - oldestOffsetKnown = true; - } - else - { - /* - * Figure out where the oldest existing multixact's offsets are - * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X, - * the supposedly-earliest multixact might not really exist. We are - * careful not to fail in that case. - */ - oldestOffsetKnown = - find_multixact_start(oldestMultiXactId, &oldestOffset); - - if (oldestOffsetKnown) - ereport(DEBUG1, - (errmsg_internal("oldest MultiXactId member is at offset %llu", - (unsigned long long) oldestOffset))); - else - ereport(LOG, - (errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %llu does not exist on disk", - (unsigned long long) oldestMultiXactId))); - } - - LWLockRelease(MultiXactTruncationLock); - - /* - * If we can, compute limits (and install them MultiXactState) to prevent - * overrun of old data in the members SLRU area. We can only do so if the - * oldest offset is known though. - */ - if (oldestOffsetKnown) - { - /* move back to start of the corresponding segment */ - offsetStopLimit = oldestOffset - (oldestOffset % - (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT)); - - /* always leave one segment before the wraparound point */ - offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT); - - if (!prevOldestOffsetKnown && !is_startup) - ereport(LOG, - (errmsg("MultiXact member wraparound protections are now enabled"))); - - ereport(DEBUG1, - (errmsg_internal("MultiXact member stop limit is now %llu based on MultiXact %llu", - (unsigned long long) offsetStopLimit, - (unsigned long long) oldestMultiXactId))); - } - else if (prevOldestOffsetKnown) - { - /* - * If we failed to get the oldest offset this time, but we have a - * value from a previous pass through this function, use the old - * values rather than automatically forcing an emergency autovacuum - * cycle again. - */ - oldestOffset = prevOldestOffset; - oldestOffsetKnown = true; - offsetStopLimit = prevOffsetStopLimit; - } - - /* Install the computed values */ - LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); - MultiXactState->oldestOffset = oldestOffset; - MultiXactState->oldestOffsetKnown = oldestOffsetKnown; - MultiXactState->offsetStopLimit = offsetStopLimit; - LWLockRelease(MultiXactGenLock); - - /* - * Do we need an emergency autovacuum? If we're not sure, assume yes. - */ - return !oldestOffsetKnown || - (nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD); -} - -/* - * Return whether adding "distance" to "start" would move past "boundary". - * - * We use this to determine whether the addition is "wrapping around" the - * boundary point, hence the name. The reason we don't want to use the regular - * 2^31-modulo arithmetic here is that we want to be able to use the whole of - * the 2^32-1 space here, allowing for more multixacts than would fit - * otherwise. - */ -static bool -MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start, - uint32 distance) -{ - MultiXactOffset finish; - - /* - * Note that offset number 0 is not used (see GetMultiXactIdMembers), so - * if the addition wraps around the UINT_MAX boundary, skip that value. - */ - finish = start + distance; - if (finish < start) - finish++; - - /*----------------------------------------------------------------------- - * When the boundary is numerically greater than the starting point, any - * value numerically between the two is not wrapped: - * - * <----S----B----> - * [---) = F wrapped past B (and UINT_MAX) - * [---) = F not wrapped - * [----] = F wrapped past B - * - * When the boundary is numerically less than the starting point (i.e. the - * UINT_MAX wraparound occurs somewhere in between) then all values in - * between are wrapped: - * - * <----B----S----> - * [---) = F not wrapped past B (but wrapped past UINT_MAX) - * [---) = F wrapped past B (and UINT_MAX) - * [----] = F not wrapped - *----------------------------------------------------------------------- - */ - if (start < boundary) - return finish >= boundary || finish < start; - else - return finish >= boundary && finish < start; -} - /* * Find the starting offset of the given MultiXactId. * @@ -2812,97 +2361,6 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result) return true; } -/* - * Determine how many multixacts, and how many multixact members, currently - * exist. Return false if unable to determine. - */ -static bool -ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members) -{ - MultiXactOffset nextOffset; - MultiXactOffset oldestOffset; - MultiXactId oldestMultiXactId; - MultiXactId nextMultiXactId; - bool oldestOffsetKnown; - - LWLockAcquire(MultiXactGenLock, LW_SHARED); - nextOffset = MultiXactState->nextOffset; - oldestMultiXactId = MultiXactState->oldestMultiXactId; - nextMultiXactId = MultiXactState->nextMXact; - oldestOffset = MultiXactState->oldestOffset; - oldestOffsetKnown = MultiXactState->oldestOffsetKnown; - LWLockRelease(MultiXactGenLock); - - if (!oldestOffsetKnown) - return false; - - *members = nextOffset - oldestOffset; - *multixacts = nextMultiXactId - oldestMultiXactId; - return true; -} - -/* - * Multixact members can be removed once the multixacts that refer to them - * are older than every datminmxid. autovacuum_multixact_freeze_max_age and - * vacuum_multixact_freeze_table_age work together to make sure we never have - * too many multixacts; we hope that, at least under normal circumstances, - * this will also be sufficient to keep us from using too many offsets. - * However, if the average multixact has many members, we might exhaust the - * members space while still using few enough members that these limits fail - * to trigger full table scans for relminmxid advancement. At that point, - * we'd have no choice but to start failing multixact-creating operations - * with an error. - * - * To prevent that, if more than a threshold portion of the members space is - * used, we effectively reduce autovacuum_multixact_freeze_max_age and - * to a value just less than the number of multixacts in use. We hope that - * this will quickly trigger autovacuuming on the table or tables with the - * oldest relminmxid, thus allowing datminmxid values to advance and removing - * some members. - * - * As the fraction of the member space currently in use grows, we become - * more aggressive in clamping this value. That not only causes autovacuum - * to ramp up, but also makes any manual vacuums the user issues more - * aggressive. This happens because vacuum_set_xid_limits() clamps the - * freeze table and the minimum freeze age based on the effective - * autovacuum_multixact_freeze_max_age this function returns. In the worst - * case, we'll claim the freeze_max_age to zero, and every vacuum of any - * table will try to freeze every multixact. - * - * It's possible that these thresholds should be user-tunable, but for now - * we keep it simple. - */ -int -MultiXactMemberFreezeThreshold(void) -{ - MultiXactOffset members; - uint32 multixacts; - uint32 victim_multixacts; - double fraction; - - /* If we can't determine member space utilization, assume the worst. */ - if (!ReadMultiXactCounts(&multixacts, &members)) - return 0; - - /* If member space utilization is low, no special action is required. */ - if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD) - return autovacuum_multixact_freeze_max_age; - - /* - * Compute a target for relminmxid advancement. The number of multixacts - * we try to eliminate from the system is based on how far we are past - * MULTIXACT_MEMBER_SAFE_THRESHOLD. - */ - fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) / - (MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD); - victim_multixacts = multixacts * fraction; - - /* fraction could be > 1.0, but lowest possible freeze age is zero */ - if (victim_multixacts > multixacts) - return 0; - return multixacts - victim_multixacts; -} - typedef struct mxtruncinfo { int64 earliestExistingPage; @@ -2929,35 +2387,12 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data /* * Delete members segments [oldest, newOldest) - * - * The members SLRU can, in contrast to the offsets one, be filled to almost - * the full range at once. This means SimpleLruTruncate() can't trivially be - * used - instead the to-be-deleted range is computed using the offsets - * SLRU. C.f. TruncateMultiXact(). */ static void PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset) { - const int maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset); - int startsegment = MXOffsetToMemberSegment(oldestOffset); - int endsegment = MXOffsetToMemberSegment(newOldestOffset); - int segment = startsegment; - - /* - * Delete all the segments but the last one. The last segment can still - * contain, possibly partially, valid data. - */ - while (segment != endsegment) - { - elog(DEBUG2, "truncating multixact members segment %x", segment); - SlruDeleteSegment(MultiXactMemberCtl, segment); - - /* move to next segment, handling wraparound correctly */ - if (segment == maxsegment) - segment = 0; - else - segment += 1; - } + SimpleLruTruncate(MultiXactMemberCtl, + MXOffsetToMemberPage(newOldestOffset)); } /* @@ -3076,7 +2511,8 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) { ereport(LOG, (errmsg("oldest MultiXact %llu not found, earliest MultiXact %llu, skipping truncation", - (unsigned long long) oldestMulti, (unsigned long long) earliest))); + (unsigned long long) oldestMulti, + (unsigned long long) earliest))); LWLockRelease(MultiXactTruncationLock); return; } @@ -3100,14 +2536,14 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB) } elog(DEBUG1, "performing multixact truncation: " - "offsets [%llu, %llu), offsets segments [%x, %x), " - "members [%u, %u), members segments [%x, %x)", + "offsets [%llu, %llu), offsets segments [%012llx, %012llx), " + "members [%lld, %lld), members segments [%012llx, %012llx)", (unsigned long long) oldestMulti, (unsigned long long) newOldestMulti, - MultiXactIdToOffsetSegment(oldestMulti), - MultiXactIdToOffsetSegment(newOldestMulti), - oldestOffset, newOldestOffset, - MXOffsetToMemberSegment(oldestOffset), - MXOffsetToMemberSegment(newOldestOffset)); + (unsigned long long) MultiXactIdToOffsetSegment(oldestMulti), + (unsigned long long) MultiXactIdToOffsetSegment(newOldestMulti), + (long long) oldestOffset, (long long) newOldestOffset, + (unsigned long long) MXOffsetToMemberSegment(oldestOffset), + (unsigned long long) MXOffsetToMemberSegment(newOldestOffset)); /* * Do truncation, and the WAL logging of the truncation, in a critical @@ -3181,7 +2617,7 @@ MultiXactOffsetPagePrecedes(int64 page1, int64 page2) /* * Decide whether a MultiXactMember page number is "older" for truncation - * purposes. There is no "invalid offset number" so use the numbers verbatim. + * purposes. There is no "invalid offset number" so use the numbers verbatim. */ static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2) @@ -3206,7 +2642,7 @@ MultiXactMemberPagePrecedes(int64 page1, int64 page2) bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2) { - int32 diff = (int32) (multi1 - multi2); + int64 diff = (int64) (multi1 - multi2); return (diff < 0); } @@ -3220,7 +2656,7 @@ MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2) bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2) { - int32 diff = (int32) (multi1 - multi2); + int64 diff = (int64) (multi1 - multi2); return (diff <= 0); } @@ -3232,7 +2668,7 @@ MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2) static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2) { - int32 diff = (int32) (offset1 - offset2); + int64 diff = (int64) (offset1 - offset2); return (diff < 0); } @@ -3356,15 +2792,16 @@ multixact_redo(XLogReaderState *record) SizeOfMultiXactTruncate); elog(DEBUG1, "replaying multixact truncation: " - "offsets [%llu, %llu), offsets segments [%x, %x), " - "members [%u, %u), members segments [%x, %x)", + "offsets [%llu, %llu), offsets segments [%012llx, %012llx), " + "members [%llu, %llu), members segments [%012llx, %012llx)", (unsigned long long) xlrec.startTruncOff, (unsigned long long) xlrec.endTruncOff, - MultiXactIdToOffsetSegment(xlrec.startTruncOff), - MultiXactIdToOffsetSegment(xlrec.endTruncOff), - xlrec.startTruncMemb, xlrec.endTruncMemb, - MXOffsetToMemberSegment(xlrec.startTruncMemb), - MXOffsetToMemberSegment(xlrec.endTruncMemb)); + (unsigned long long) MultiXactIdToOffsetSegment(xlrec.startTruncOff), + (unsigned long long) MultiXactIdToOffsetSegment(xlrec.endTruncOff), + (unsigned long long) xlrec.startTruncMemb, + (unsigned long long) xlrec.endTruncMemb, + (unsigned long long) MXOffsetToMemberSegment(xlrec.startTruncMemb), + (unsigned long long) MXOffsetToMemberSegment(xlrec.endTruncMemb)); /* should not be required, but more than cheap enough */ LWLockAcquire(MultiXactTruncationLock, LW_EXCLUSIVE); @@ -3408,7 +2845,8 @@ pg_get_multixact_members(PG_FUNCTION_ARGS) if (mxid < FirstMultiXactId) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid MultiXactId: %llu", (unsigned long long) mxid))); + errmsg("invalid MultiXactId: %llu", + (unsigned long long) mxid))); if (SRF_IS_FIRSTCALL()) { diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 9e765c6c28a..c186e177ed1 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -1428,7 +1428,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) * must not assign. */ lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */ - rhs = lhs + (1U << 31); + rhs = lhs + (1ULL << 63); Assert(TransactionIdPrecedes(lhs, rhs)); Assert(TransactionIdPrecedes(rhs, lhs)); Assert(!TransactionIdPrecedes(lhs - 1, rhs)); @@ -1444,13 +1444,14 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page)); Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page)); Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page) - || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */ + || (1ULL << 63) % per_page != 0); /* See CommitTsPagePrecedes() */ Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page) - || (1U << 31) % per_page != 0); + || (1ULL << 63) % per_page != 0); Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page)); Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page)); Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page)); + /* * GetNewTransactionId() has assigned the last XID it can safely use, and * that XID is in the *LAST* page of the second segment. We must not @@ -1460,7 +1461,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) newestXact = newestPage * per_page + offset; Assert(newestXact / per_page == newestPage); oldestXact = newestXact + 1; - oldestXact -= 1U << 31; + oldestXact -= 1ULL << 63; oldestPage = oldestXact / per_page; Assert(!SlruMayDeleteSegment(ctl, (newestPage - @@ -1476,7 +1477,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) newestXact = newestPage * per_page + offset; Assert(newestXact / per_page == newestPage); oldestXact = newestXact + 1; - oldestXact -= 1U << 31; + oldestXact -= 1ULL << 63; oldestPage = oldestXact / per_page; Assert(!SlruMayDeleteSegment(ctl, (newestPage - @@ -1582,7 +1583,7 @@ SlruScanDirectory(SlruCtl ctl, SlruScanCallback callback, void *data) if ((len == 12 || len == 13 || len == 14) && strspn(clde->d_name, "0123456789ABCDEF") == len) { - segno = (int) strtol(clde->d_name, NULL, 16); + segno = (int) strtoi64(clde->d_name, NULL, 16); segpage = segno * SLRU_PAGES_PER_SEGMENT; elog(DEBUG2, "SlruScanDirectory invoking callback on %s/%s", diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index e2e20ed06c3..2d124d9600d 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -212,11 +212,14 @@ void BootStrapSUBTRANS(void) { int slotno; + int64 pageno; + + pageno = TransactionIdToPage(XidFromFullTransactionId(ShmemVariableCache->nextXid)); LWLockAcquire(SubtransSLRULock, LW_EXCLUSIVE); /* Create and zero the first page of the subtrans log */ - slotno = ZeroSUBTRANSPage(0); + slotno = ZeroSUBTRANSPage(pageno); /* Make sure it's written out */ SimpleLruWritePage(SubTransCtl, slotno); @@ -269,9 +272,6 @@ StartupSUBTRANS(TransactionId oldestActiveXID) { (void) ZeroSUBTRANSPage(startPage); startPage++; - /* must account for wraparound */ - if (startPage > TransactionIdToPage(MaxTransactionId)) - startPage = 0; } (void) ZeroSUBTRANSPage(startPage); @@ -348,6 +348,7 @@ TruncateSUBTRANS(TransactionId oldestXact) * a page and oldestXact == next XID. In that case, if we didn't subtract * one, we'd trigger SimpleLruTruncate's wraparound detection. */ + TransactionIdRetreat(oldestXact); cutoffPage = TransactionIdToPage(oldestXact); diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index b243878094f..7b8beaae05b 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -301,14 +301,14 @@ TransactionIdPrecedes(TransactionId id1, TransactionId id2) { /* * If either ID is a permanent XID then we can just do unsigned - * comparison. If both are normal, do a modulo-2^32 comparison. + * comparison. If both are normal, do a modulo-2^64 comparison. */ - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 < id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff < 0); } @@ -318,12 +318,12 @@ TransactionIdPrecedes(TransactionId id1, TransactionId id2) bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2) { - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 <= id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff <= 0); } @@ -333,12 +333,12 @@ TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2) bool TransactionIdFollows(TransactionId id1, TransactionId id2) { - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 > id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff > 0); } @@ -348,12 +348,12 @@ TransactionIdFollows(TransactionId id1, TransactionId id2) bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2) { - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 >= id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff >= 0); } diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index efc19822a46..019867fe919 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -945,7 +945,7 @@ TwoPhaseGetDummyProc(TransactionId xid, bool lock_held) /************************************************************************/ #define TwoPhaseFilePath(path, xid) \ - snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X", xid) + snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%016llX", (unsigned long long) xid) /* * 2PC state file format: @@ -1884,13 +1884,13 @@ restoreTwoPhaseData(void) cldir = AllocateDir(TWOPHASE_DIR); while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL) { - if (strlen(clde->d_name) == 8 && - strspn(clde->d_name, "0123456789ABCDEF") == 8) + if (strlen(clde->d_name) == 16 && + strspn(clde->d_name, "0123456789ABCDEF") == 16) { TransactionId xid; char *buf; - xid = (TransactionId) strtoul(clde->d_name, NULL, 16); + xid = (TransactionId) strtou64(clde->d_name, NULL, 16); buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, true, false, false); @@ -2222,7 +2222,6 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (fromdisk) { - /* Read and validate file */ buf = ReadTwoPhaseFile(xid, false); } else diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 748120a0125..0a89721b004 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -68,7 +68,7 @@ GetNewTransactionId(bool isSubXact) Assert(!isSubXact); MyProc->xid = BootstrapTransactionId; ProcGlobal->xids[MyProc->pgxactoff] = BootstrapTransactionId; - return FullTransactionIdFromEpochAndXid(0, BootstrapTransactionId); + return FullTransactionIdFromXid(BootstrapTransactionId); } /* safety check, we should never get this far in a HS standby */ @@ -102,11 +102,6 @@ GetNewTransactionId(bool isSubXact) * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ - TransactionId xidWarnLimit = ShmemVariableCache->xidWarnLimit; - TransactionId xidStopLimit = ShmemVariableCache->xidStopLimit; - TransactionId xidWrapLimit = ShmemVariableCache->xidWrapLimit; - Oid oldest_datoid = ShmemVariableCache->oldestXidDB; - LWLockRelease(XidGenLock); /* @@ -117,48 +112,6 @@ GetNewTransactionId(bool isSubXact) if (IsUnderPostmaster && (xid % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - if (IsUnderPostmaster && - TransactionIdFollowsOrEquals(xid, xidStopLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands to avoid wraparound data loss in database \"%s\"", - oldest_datname), - errhint("Stop the postmaster and vacuum that database in single-user mode.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands to avoid wraparound data loss in database with OID %u", - oldest_datoid), - errhint("Stop the postmaster and vacuum that database in single-user mode.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %u transactions", - oldest_datname, - xidWrapLimit - xid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg("database with OID %u must be vacuumed within %u transactions", - oldest_datoid, - xidWrapLimit - xid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - /* Re-acquire lock and start over */ LWLockAcquire(XidGenLock, LW_EXCLUSIVE); full_xid = ShmemVariableCache->nextXid; @@ -270,7 +223,7 @@ ReadNextFullTransactionId(void) } /* - * Advance nextXid to the value after a given xid. The epoch is inferred. + * Advance nextXid to the value after a given xid. * This must only be called during recovery or from two-phase start-up code. */ void @@ -278,7 +231,6 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid) { FullTransactionId newNextFullXid; TransactionId next_xid; - uint32 epoch; /* * It is safe to read nextXid without a lock, because this is only called @@ -292,19 +244,9 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid) if (!TransactionIdFollowsOrEquals(xid, next_xid)) return; - /* - * Compute the FullTransactionId that comes after the given xid. To do - * this, we preserve the existing epoch, but detect when we've wrapped - * into a new epoch. This is necessary because WAL records and 2PC state - * currently contain 32 bit xids. The wrap logic is safe in those cases - * because the span of active xids cannot exceed one epoch at any given - * point in the WAL stream. - */ + /* Compute the FullTransactionId that comes after the given xid. */ TransactionIdAdvance(xid); - epoch = EpochFromFullTransactionId(ShmemVariableCache->nextXid); - if (unlikely(xid < next_xid)) - ++epoch; - newNextFullXid = FullTransactionIdFromEpochAndXid(epoch, xid); + newNextFullXid = FullTransactionIdFromXid(xid); /* * We still need to take a lock to modify the value when there are @@ -345,54 +287,10 @@ void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) { TransactionId xidVacLimit; - TransactionId xidWarnLimit; - TransactionId xidStopLimit; - TransactionId xidWrapLimit; TransactionId curXid; Assert(TransactionIdIsNormal(oldest_datfrozenxid)); - /* - * The place where we actually get into deep trouble is halfway around - * from the oldest potentially-existing XID. (This calculation is - * probably off by one or two counts, because the special XIDs reduce the - * size of the loop a little bit. But we throw in plenty of slop below, - * so it doesn't matter.) - */ - xidWrapLimit = oldest_datfrozenxid + (MaxTransactionId >> 1); - if (xidWrapLimit < FirstNormalTransactionId) - xidWrapLimit += FirstNormalTransactionId; - - /* - * We'll refuse to continue assigning XIDs in interactive mode once we get - * within 3M transactions of data loss. This leaves lots of room for the - * DBA to fool around fixing things in a standalone backend, while not - * being significant compared to total XID space. (VACUUM requires an XID - * if it truncates at wal_level!=minimal. "VACUUM (ANALYZE)", which a DBA - * might do by reflex, assigns an XID. Hence, we had better be sure - * there's lots of XIDs left...) Also, at default BLCKSZ, this leaves two - * completely-idle segments. In the event of edge-case bugs involving - * page or segment arithmetic, idle segments render the bugs unreachable - * outside of single-user mode. - */ - xidStopLimit = xidWrapLimit - 3000000; - if (xidStopLimit < FirstNormalTransactionId) - xidStopLimit -= FirstNormalTransactionId; - - /* - * We'll start complaining loudly when we get within 40M transactions of - * data loss. This is kind of arbitrary, but if you let your gas gauge - * get down to 2% of full, would you be looking for the next gas station? - * We need to be fairly liberal about this number because there are lots - * of scenarios where most transactions are done by automatic clients that - * won't pay attention to warnings. (No, we're not gonna make this - * configurable. If you know enough to configure it, you know enough to - * not get in this kind of trouble in the first place.) - */ - xidWarnLimit = xidWrapLimit - 40000000; - if (xidWarnLimit < FirstNormalTransactionId) - xidWarnLimit -= FirstNormalTransactionId; - /* * We'll start trying to force autovacuums when oldest_datfrozenxid gets * to be more than autovacuum_freeze_max_age transactions old. @@ -416,18 +314,10 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) LWLockAcquire(XidGenLock, LW_EXCLUSIVE); ShmemVariableCache->oldestXid = oldest_datfrozenxid; ShmemVariableCache->xidVacLimit = xidVacLimit; - ShmemVariableCache->xidWarnLimit = xidWarnLimit; - ShmemVariableCache->xidStopLimit = xidStopLimit; - ShmemVariableCache->xidWrapLimit = xidWrapLimit; ShmemVariableCache->oldestXidDB = oldest_datoid; curXid = XidFromFullTransactionId(ShmemVariableCache->nextXid); LWLockRelease(XidGenLock); - /* Log the info */ - ereport(DEBUG1, - (errmsg_internal("transaction ID wrap limit is %u, limited by database with OID %u", - xidWrapLimit, oldest_datoid))); - /* * If past the autovacuum force point, immediately signal an autovac * request. The reason for this is that autovac only processes one @@ -438,41 +328,6 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) if (TransactionIdFollowsOrEquals(curXid, xidVacLimit) && IsUnderPostmaster && !InRecovery) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* Give an immediate warning if past the wrap warn point */ - if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit) && !InRecovery) - { - char *oldest_datname; - - /* - * We can be called when not inside a transaction, for example during - * StartupXLOG(). In such a case we cannot do database access, so we - * must just report the oldest DB's OID. - * - * Note: it's also possible that get_database_name fails and returns - * NULL, for example because the database just got dropped. We'll - * still warn, even though the warning might now be unnecessary. - */ - if (IsTransactionState()) - oldest_datname = get_database_name(oldest_datoid); - else - oldest_datname = NULL; - - if (oldest_datname) - ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %u transactions", - oldest_datname, - xidWrapLimit - curXid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg("database with OID %u must be vacuumed within %u transactions", - oldest_datoid, - xidWrapLimit - curXid), - errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 91bb70c1196..82736eaf6b9 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -5651,6 +5651,17 @@ XactLogCommitRecord(TimestampTz commit_time, xl_subxacts.nsubxacts = nsubxacts; } + if (TransactionIdIsValid(twophase_xid)) + { + xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; + xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF); + xl_twophase.xid_hi = (uint32) (twophase_xid >> 32); + Assert(twophase_gid != NULL); + + if (XLogLogicalInfoActive()) + xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; + } + if (nrels > 0) { xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILENODES; @@ -5670,16 +5681,6 @@ XactLogCommitRecord(TimestampTz commit_time, xl_invals.nmsgs = nmsgs; } - if (TransactionIdIsValid(twophase_xid)) - { - xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; - xl_twophase.xid = twophase_xid; - Assert(twophase_gid != NULL); - - if (XLogLogicalInfoActive()) - xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; - } - /* dump transaction origin information */ if (replorigin_session_origin != InvalidRepOriginId) { @@ -5800,6 +5801,17 @@ XactLogAbortRecord(TimestampTz abort_time, xl_subxacts.nsubxacts = nsubxacts; } + if (TransactionIdIsValid(twophase_xid)) + { + xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; + xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF); + xl_twophase.xid_hi = (uint32) (twophase_xid >> 32); + Assert(twophase_gid != NULL); + + if (XLogLogicalInfoActive()) + xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; + } + if (nrels > 0) { xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILENODES; @@ -5816,7 +5828,8 @@ XactLogAbortRecord(TimestampTz abort_time, if (TransactionIdIsValid(twophase_xid)) { xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; - xl_twophase.xid = twophase_xid; + xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF); + xl_twophase.xid_hi = (uint32) (twophase_xid >> 32); Assert(twophase_gid != NULL); if (XLogLogicalInfoActive()) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 8c60e9ba140..a1faef39668 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4539,8 +4539,8 @@ BootStrapXLOG(void) checkPoint.PrevTimeLineID = BootstrapTimeLineID; checkPoint.fullPageWrites = fullPageWrites; checkPoint.nextXid = - FullTransactionIdFromEpochAndXid(0, Max(FirstNormalTransactionId, - start_xid)); + FullTransactionIdFromXid(Max(FirstNormalTransactionId, + start_xid)); checkPoint.nextOid = FirstGenbkiObjectId; checkPoint.nextMulti = Max(FirstMultiXactId, start_mxid); checkPoint.nextMultiOffset = start_mxoff; @@ -6617,7 +6617,7 @@ CreateCheckPoint(int flags) UpdateControlFile(); LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); @@ -7643,7 +7643,7 @@ xlog_redo(XLogReaderState *record) ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); @@ -7704,7 +7704,7 @@ xlog_redo(XLogReaderState *record) ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index 2ce9be2cc76..7fdce631fe9 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -260,6 +260,11 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags) BufferGetTag(buffer, ®buf->rnode, ®buf->forkno, ®buf->block); regbuf->page = BufferGetPage(buffer); regbuf->flags = flags; + if (IsBufferConverted(buffer)) + { + regbuf->flags |= REGBUF_CONVERTED; + MarkBufferConverted(buffer, false); + } regbuf->rdata_tail = (XLogRecData *) ®buf->rdata_head; regbuf->rdata_len = 0; @@ -575,6 +580,8 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, needs_backup = true; else if (regbuf->flags & REGBUF_NO_IMAGE) needs_backup = false; + else if (regbuf->flags & REGBUF_CONVERTED) + needs_backup = true; else if (!doPageWrites) needs_backup = false; else diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index cf5db23cb86..be00254524c 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -2101,37 +2101,3 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) return true; } - -#ifndef FRONTEND - -/* - * Extract the FullTransactionId from a WAL record. - */ -FullTransactionId -XLogRecGetFullXid(XLogReaderState *record) -{ - TransactionId xid, - next_xid; - uint32 epoch; - - /* - * This function is only safe during replay, because it depends on the - * replay state. See AdvanceNextFullTransactionIdPastXid() for more. - */ - Assert(AmStartupProcess() || !IsUnderPostmaster); - - xid = XLogRecGetXid(record); - next_xid = XidFromFullTransactionId(ShmemVariableCache->nextXid); - epoch = EpochFromFullTransactionId(ShmemVariableCache->nextXid); - - /* - * If xid is numerically greater than next_xid, it has to be from the last - * epoch. - */ - if (unlikely(xid > next_xid)) - --epoch; - - return FullTransactionIdFromEpochAndXid(epoch, xid); -} - -#endif diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 35e54a58658..2269cb4d36c 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -812,7 +812,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, wasShutdown ? "true" : "false"))); ereport(DEBUG1, (errmsg_internal("next transaction ID: " UINT64_FORMAT "; next OID: %u", - U64FromFullTransactionId(checkPoint.nextXid), + XidFromFullTransactionId(checkPoint.nextXid), checkPoint.nextOid))); ereport(DEBUG1, (errmsg_internal("next MultiXactId: %llu; next MultiXactOffset: %llu", diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 459ac6fa6f3..2e539b4e0e3 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -120,7 +120,7 @@ static const struct typinfo TypInfo[] = { F_OIDIN, F_OIDOUT}, {"tid", TIDOID, 0, 6, false, TYPALIGN_SHORT, TYPSTORAGE_PLAIN, InvalidOid, F_TIDIN, F_TIDOUT}, - {"xid", XIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, + {"xid", XIDOID, 0, 8, FLOAT8PASSBYVAL, TYPALIGN_XID, TYPSTORAGE_PLAIN, InvalidOid, F_XIDIN, F_XIDOUT}, {"cid", CIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, F_CIDIN, F_CIDOUT}, @@ -252,15 +252,13 @@ BootstrapModeMain(int argc, char *argv[], bool check_only) break; case 'm': { - unsigned long value; - char *endptr; + char *endptr; errno = 0; - value = strtoul(optarg, &endptr, 0); - start_mxid = value; + start_mxid = strtoull(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0 || - value != start_mxid) /* overflow */ + !StartMultiXactIdIsValid(start_mxid)) { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -270,15 +268,13 @@ BootstrapModeMain(int argc, char *argv[], bool check_only) break; case 'o': { - unsigned long value; - char *endptr; + char *endptr; errno = 0; - value = strtoul(optarg, &endptr, 0); - start_mxoff = value; + start_mxoff = strtoull(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0 || - value != start_mxoff) /* overflow */ + !StartMultiXactOffsetIsValid(start_mxoff)) { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -303,15 +299,13 @@ BootstrapModeMain(int argc, char *argv[], bool check_only) break; case 'x': { - unsigned long value; - char *endptr; + char *endptr; errno = 0; - value = strtoul(optarg, &endptr, 0); - start_xid = value; + start_xid = strtoull(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0 || - value != start_xid) /* overflow */ + !StartTransactionIdIsValid(start_xid)) { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 1803194db94..ad254ddb086 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -159,8 +159,8 @@ static const FormData_pg_attribute a2 = { .attnum = MinTransactionIdAttributeNumber, .attcacheoff = -1, .atttypmod = -1, - .attbyval = true, - .attalign = TYPALIGN_INT, + .attbyval = FLOAT8PASSBYVAL, + .attalign = TYPALIGN_XID, .attstorage = TYPSTORAGE_PLAIN, .attnotnull = true, .attislocal = true, @@ -187,8 +187,8 @@ static const FormData_pg_attribute a4 = { .attnum = MaxTransactionIdAttributeNumber, .attcacheoff = -1, .atttypmod = -1, - .attbyval = true, - .attalign = TYPALIGN_INT, + .attbyval = FLOAT8PASSBYVAL, + .attalign = TYPALIGN_XID, .attstorage = TYPSTORAGE_PLAIN, .attnotnull = true, .attislocal = true, diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c index 92afbc2f258..3a1eda413f9 100644 --- a/src/backend/catalog/pg_inherits.c +++ b/src/backend/catalog/pg_inherits.c @@ -146,7 +146,7 @@ find_inheritance_children_extended(Oid parentrelId, bool omit_detached, TransactionId xmin; Snapshot snap; - xmin = HeapTupleHeaderGetXmin(inheritsTuple->t_data); + xmin = HeapTupleGetXmin(inheritsTuple); snap = GetActiveSnapshot(); if (!XidInMVCCSnapshot(xmin, snap)) diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 056dca8e478..56295b9aa6a 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -187,7 +187,7 @@ typedef struct AsyncQueueEntry } AsyncQueueEntry; /* Currently, no field of AsyncQueueEntry requires more than int alignment */ -#define QUEUEALIGN(len) INTALIGN(len) +#define QUEUEALIGN(len) TYPEALIGN(8, len) #define AsyncQueueEntryEmptySize (offsetof(AsyncQueueEntry, data) + 2) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index f2691684010..a0a9d51bd3d 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -361,6 +361,7 @@ ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid, tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationRelationId; + HeapTupleCopyBaseFromPage(&tuple, page); /* Skip tuples that are not visible to this snapshot. */ if (HeapTupleSatisfiesVisibility(&tuple, snapshot, buf)) diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index ddf219b21f5..375a23403a3 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -48,6 +48,20 @@ #include "utils/syscache.h" #include "utils/varlena.h" +#define SeqTupleHeaderSetXmin(tup, xid) \ +( \ + (tup)->t_choice.t_heap.t_xmin = (xid) \ +) + +#define SeqTupleHeaderSetXmax(tup, xid) \ +( \ + (tup)->t_choice.t_heap.t_xmax = (xid) \ +) + +#define SeqTupleHeaderGetRawXmax(tup) \ +( \ + (tup)->t_choice.t_heap.t_xmax \ +) /* * We don't want to log each fetching of a value from a sequence, @@ -389,10 +403,10 @@ fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) * because if the current transaction aborts, no other xact will ever * examine the sequence tuple anyway. */ - HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); + SeqTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); HeapTupleHeaderSetXminFrozen(tuple->t_data); HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); - HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); + SeqTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); @@ -1224,9 +1238,9 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) * this again if the update gets lost. */ Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); - if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) + if (SeqTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) { - HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); + SeqTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; MarkBufferDirtyHint(*buf, true); diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 8df25f59d87..fdc0d1ba49f 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -63,13 +63,12 @@ /* * GUC parameters */ -int vacuum_freeze_min_age; -int vacuum_freeze_table_age; -int vacuum_multixact_freeze_min_age; -int vacuum_multixact_freeze_table_age; -int vacuum_failsafe_age; -int vacuum_multixact_failsafe_age; - +int64 vacuum_freeze_min_age; +int64 vacuum_freeze_table_age; +int64 vacuum_multixact_freeze_min_age; +int64 vacuum_multixact_freeze_table_age; +int64 vacuum_failsafe_age; +int64 vacuum_multixact_failsafe_age; /* A few variables that don't seem worth passing around as parameters */ static MemoryContext vac_context = NULL; @@ -957,23 +956,25 @@ get_all_vacuum_rels(int options) */ bool vacuum_set_xid_limits(Relation rel, - int freeze_min_age, - int freeze_table_age, - int multixact_freeze_min_age, - int multixact_freeze_table_age, + int64 freeze_min_age, + int64 freeze_table_age, + int64 multixact_freeze_min_age, + int64 multixact_freeze_table_age, TransactionId *oldestXmin, MultiXactId *oldestMxact, TransactionId *freezeLimit, MultiXactId *multiXactCutoff) { - int freezemin; - int mxid_freezemin; - int effective_multixact_freeze_max_age; + int64 freezemin; + int64 mxid_freezemin; + int64 effective_multixact_freeze_max_age; TransactionId limit; TransactionId safeLimit; MultiXactId mxactLimit; MultiXactId safeMxactLimit; - int freezetable; + int64 freezetable; + TransactionId nextXid; + TransactionId nextMxactId; /* * We can always ignore processes running lazy vacuum. This is because we @@ -1022,8 +1023,10 @@ vacuum_set_xid_limits(Relation rel, /* * Compute the cutoff XID, being careful not to generate a "permanent" XID */ - limit = *oldestXmin - freezemin; - if (!TransactionIdIsNormal(limit)) + limit = *oldestXmin; + if (limit > FirstNormalTransactionId + freezemin) + limit -= freezemin; + else limit = FirstNormalTransactionId; /* @@ -1031,8 +1034,10 @@ vacuum_set_xid_limits(Relation rel, * autovacuum_freeze_max_age / 2 XIDs old), complain and force a minimum * freeze age of zero. */ - safeLimit = ReadNextTransactionId() - autovacuum_freeze_max_age; - if (!TransactionIdIsNormal(safeLimit)) + nextXid = ReadNextTransactionId(); + if (nextXid > FirstNormalTransactionId + autovacuum_freeze_max_age) + safeLimit = nextXid - autovacuum_freeze_max_age; + else safeLimit = FirstNormalTransactionId; if (TransactionIdPrecedes(limit, safeLimit)) @@ -1051,7 +1056,7 @@ vacuum_set_xid_limits(Relation rel, * normally autovacuum_multixact_freeze_max_age, but may be less if we are * short of multixact member space. */ - effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); + effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age; /* * Determine the minimum multixact freeze age to use: as specified by @@ -1070,20 +1075,30 @@ vacuum_set_xid_limits(Relation rel, *oldestMxact = GetOldestMultiXactId(); /* compute the cutoff multi, being careful to generate a valid value */ - mxactLimit = *oldestMxact - mxid_freezemin; - if (mxactLimit < FirstMultiXactId) + *oldestMxact = GetOldestMultiXactId(); + if (*oldestMxact > FirstMultiXactId + mxid_freezemin) + mxactLimit = *oldestMxact - mxid_freezemin; + else mxactLimit = FirstMultiXactId; - safeMxactLimit = - ReadNextMultiXactId() - effective_multixact_freeze_max_age; - if (safeMxactLimit < FirstMultiXactId) + nextMxactId = ReadNextMultiXactId(); + if (nextMxactId > FirstMultiXactId + effective_multixact_freeze_max_age) + safeMxactLimit = nextMxactId - effective_multixact_freeze_max_age; + else safeMxactLimit = FirstMultiXactId; if (MultiXactIdPrecedes(mxactLimit, safeMxactLimit)) { ereport(WARNING, - (errmsg("oldest multixact is far in the past"), - errhint("Close open transactions with multixacts soon to avoid wraparound problems."))); + (errmsg("oldest multixact is far in the past: %lld %lld %llu %lld %llu %llu %lld ", + (long long) multixact_freeze_min_age, + (long long) vacuum_multixact_freeze_min_age, + (unsigned long long) mxactLimit, + (long long) mxid_freezemin, + (unsigned long long) *oldestMxact, + (unsigned long long) safeMxactLimit, + (long long) effective_multixact_freeze_max_age), + errhint("Close open transactions with multixacts soon to enable SLRU truncation."))); /* Use the safe limit, unless an older mxact is still running */ if (MultiXactIdPrecedes(*oldestMxact, safeMxactLimit)) mxactLimit = *oldestMxact; @@ -1113,8 +1128,10 @@ vacuum_set_xid_limits(Relation rel, * Compute XID limit causing an aggressive vacuum, being careful not to * generate a "permanent" XID */ - limit = ReadNextTransactionId() - freezetable; - if (!TransactionIdIsNormal(limit)) + limit = ReadNextTransactionId(); + if (limit > FirstNormalTransactionId + freezetable) + limit -= freezetable; + else limit = FirstNormalTransactionId; if (TransactionIdPrecedesOrEquals(rel->rd_rel->relfrozenxid, limit)) @@ -1139,8 +1156,10 @@ vacuum_set_xid_limits(Relation rel, * Compute MultiXact limit causing an aggressive vacuum, being careful to * generate a valid MultiXact value */ - mxactLimit = ReadNextMultiXactId() - freezetable; - if (mxactLimit < FirstMultiXactId) + mxactLimit = ReadNextMultiXactId(); + if (mxactLimit > FirstMultiXactId + freezetable) + mxactLimit -= freezetable; + else mxactLimit = FirstMultiXactId; if (MultiXactIdPrecedesOrEquals(rel->rd_rel->relminmxid, mxactLimit)) @@ -1407,6 +1426,9 @@ vac_update_relstats(Relation relation, futurexid = false; if (frozenxid_updated) *frozenxid_updated = false; + + Assert(TransactionIdPrecedesOrEquals(frozenxid, ReadNextTransactionId())); + if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid) { bool update = false; @@ -1430,6 +1452,9 @@ vac_update_relstats(Relation relation, futuremxid = false; if (minmulti_updated) *minmulti_updated = false; + + Assert(MultiXactIdPrecedesOrEquals(minmulti, ReadNextMultiXactId())); + if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti) { bool update = false; @@ -1457,14 +1482,16 @@ vac_update_relstats(Relation relation, if (futurexid) ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("overwrote invalid relfrozenxid value %u with new value %u for table \"%s\"", - oldfrozenxid, frozenxid, + errmsg_internal("overwrote invalid relfrozenxid value %llu with new value %llu for table \"%s\"", + (unsigned long long) oldfrozenxid, + (unsigned long long) frozenxid, RelationGetRelationName(relation)))); if (futuremxid) ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("overwrote invalid relminmxid value %u with new value %u for table \"%s\"", - oldminmulti, minmulti, + errmsg_internal("overwrote invalid relminmxid value %llu with new value %llu for table \"%s\"", + (unsigned long long) oldminmulti, + (unsigned long long) minmulti, RelationGetRelationName(relation)))); } diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index e44ad68cda1..2d4b8874024 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -3186,6 +3186,7 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte tmptup.t_len = HeapTupleHeaderGetDatumLength(tuphdr); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tmptup); tmptup.t_data = tuphdr; heap_deform_tuple(&tmptup, tupDesc, diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 9df1f81ea89..d8c92f48469 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -1070,6 +1070,7 @@ GetAttributeByName(HeapTupleHeader tuple, const char *attname, bool *isNull) tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tmptup); tmptup.t_data = tuple; result = heap_getattr(&tmptup, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index a49c3da5b6c..17bbc487623 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -3677,6 +3677,7 @@ ExecModifyTable(PlanState *pstate) HeapTupleHeaderGetDatumLength(oldtupdata.t_data); ItemPointerSetInvalid(&(oldtupdata.t_self)); /* Historically, view triggers see invalid t_tableOid. */ + HeapTupleCopyHeaderXids(&oldtupdata); oldtupdata.t_tableOid = (relkind == RELKIND_VIEW) ? InvalidOid : RelationGetRelid(resultRelInfo->ri_RelationDesc); diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 29bc26669b0..fb03e6cc4b2 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -1154,6 +1154,7 @@ SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum, mtuple->t_data->t_ctid = tuple->t_data->t_ctid; mtuple->t_self = tuple->t_self; mtuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyBase(mtuple, tuple); } else { diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 51d630fa892..e4c21cb9347 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -6021,6 +6021,7 @@ copyObjectImpl(const void *from) * perform a shallow copy via list_copy() */ case T_IntList: + case T_Int64List: case T_OidList: retval = list_copy(from); break; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index e747e1667d0..0ebf78d5295 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -3490,6 +3490,13 @@ _equalList(const List *a, const List *b) return false; } break; + case T_Int64List: + forboth(item_a, a, item_b, b) + { + if (lfirst_int64(item_a) != lfirst_int64(item_b)) + return false; + } + break; case T_OidList: forboth(item_a, a, item_b, b) { @@ -3808,6 +3815,7 @@ equal(const void *a, const void *b) case T_List: case T_IntList: + case T_Int64List: case T_OidList: retval = _equalList(a, b); break; diff --git a/src/backend/nodes/list.c b/src/backend/nodes/list.c index f843f861ef8..91f080cdca2 100644 --- a/src/backend/nodes/list.c +++ b/src/backend/nodes/list.c @@ -53,6 +53,7 @@ */ #define IsPointerList(l) ((l) == NIL || IsA((l), List)) #define IsIntegerList(l) ((l) == NIL || IsA((l), IntList)) +#define IsInteger64List(l) ((l) == NIL || IsA((l), Int64List)) #define IsOidList(l) ((l) == NIL || IsA((l), OidList)) #ifdef USE_ASSERT_CHECKING @@ -71,6 +72,7 @@ check_list_invariants(const List *list) Assert(list->type == T_List || list->type == T_IntList || + list->type == T_Int64List || list->type == T_OidList); } #else @@ -365,6 +367,24 @@ lappend_int(List *list, int datum) return list; } +/* + * Append an integer to the specified list. See lappend() + */ +List * +lappend_int64(List *list, int64 datum) +{ + Assert(IsInteger64List(list)); + + if (list == NIL) + list = new_list(T_Int64List, 1); + else + new_tail_cell(list); + + llast_int64(list) = datum; + check_list_invariants(list); + return list; +} + /* * Append an OID to the specified list. See lappend() */ diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index ce129155925..5e728c68e0d 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -219,6 +219,8 @@ _outList(StringInfo str, const List *node) if (IsA(node, IntList)) appendStringInfoChar(str, 'i'); + else if (IsA(node, Int64List)) + appendStringInfoChar(str, 'I'); else if (IsA(node, OidList)) appendStringInfoChar(str, 'o'); @@ -237,6 +239,8 @@ _outList(StringInfo str, const List *node) } else if (IsA(node, IntList)) appendStringInfo(str, " %d", lfirst_int(lc)); + else if (IsA(node, Int64List)) + appendStringInfo(str, " %lld", (long long) lfirst_int64(lc)); else if (IsA(node, OidList)) appendStringInfo(str, " %u", lfirst_oid(lc)); else @@ -2929,8 +2933,8 @@ _outIndexStmt(StringInfo str, const IndexStmt *node) WRITE_STRING_FIELD(idxcomment); WRITE_OID_FIELD(indexOid); WRITE_OID_FIELD(oldNode); - WRITE_UINT_FIELD(oldCreateSubid); - WRITE_UINT_FIELD(oldFirstRelfilenodeSubid); + WRITE_UINT64_FIELD(oldCreateSubid); + WRITE_UINT64_FIELD(oldFirstRelfilenodeSubid); WRITE_BOOL_FIELD(unique); WRITE_BOOL_FIELD(nulls_not_distinct); WRITE_BOOL_FIELD(primary); @@ -4033,7 +4037,7 @@ outNode(StringInfo str, const void *obj) if (obj == NULL) appendStringInfoString(str, "<>"); - else if (IsA(obj, List) || IsA(obj, IntList) || IsA(obj, OidList)) + else if (IsA(obj, List) || IsA(obj, IntList) || IsA(obj, Int64List) || IsA(obj, OidList)) _outList(str, obj); /* nodeRead does not want to see { } around these! */ else if (IsA(obj, Integer)) diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 5012bfe1425..15ab82b3845 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -228,7 +228,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, * src/backend/access/heap/README.HOT for discussion. */ if (index->indcheckxmin && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(indexRelation->rd_indextuple), TransactionXmin)) { root->glob->transientPlan = true; diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 2e146aac93b..9b78e4e5fbf 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -122,8 +122,8 @@ int autovacuum_vac_ins_thresh; double autovacuum_vac_ins_scale; int autovacuum_anl_thresh; double autovacuum_anl_scale; -int autovacuum_freeze_max_age; -int autovacuum_multixact_freeze_max_age; +int64 autovacuum_freeze_max_age; +int64 autovacuum_multixact_freeze_max_age; double autovacuum_vac_cost_delay; int autovacuum_vac_cost_limit; @@ -146,10 +146,10 @@ static TransactionId recentXid; static MultiXactId recentMulti; /* Default freeze ages to use for autovacuum (varies by database) */ -static int default_freeze_min_age; -static int default_freeze_table_age; -static int default_multixact_freeze_min_age; -static int default_multixact_freeze_table_age; +static int64 default_freeze_min_age; +static int64 default_freeze_table_age; +static int64 default_multixact_freeze_min_age; +static int64 default_multixact_freeze_table_age; /* Memory context for long-lived data */ static MemoryContext AutovacMemCxt; @@ -325,15 +325,15 @@ static void FreeWorkerInfo(int code, Datum arg); static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, - int effective_multixact_freeze_max_age); + int64 effective_multixact_freeze_max_age); static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts, Form_pg_class classForm, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void autovacuum_do_vac_analyze(autovac_table *tab, @@ -1149,6 +1149,7 @@ do_start_worker(void) ListCell *cell; TransactionId xidForceLimit; MultiXactId multiForceLimit; + int64 multiMembersThreshold; bool for_xid_wrap; bool for_multi_wrap; avw_dbase *avdb; @@ -1185,17 +1186,18 @@ do_start_worker(void) * particular tables, but not loosened.) */ recentXid = ReadNextTransactionId(); - xidForceLimit = recentXid - autovacuum_freeze_max_age; - /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */ - /* this can cause the limit to go backwards by 3, but that's OK */ - if (xidForceLimit < FirstNormalTransactionId) - xidForceLimit -= FirstNormalTransactionId; + if (recentXid > FirstNormalTransactionId + autovacuum_freeze_max_age) + xidForceLimit = recentXid - autovacuum_freeze_max_age; + else + xidForceLimit = FirstNormalTransactionId; /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); - multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold(); - if (multiForceLimit < FirstMultiXactId) - multiForceLimit -= FirstMultiXactId; + multiMembersThreshold = autovacuum_multixact_freeze_max_age; + if (recentMulti > FirstMultiXactId + multiMembersThreshold) + multiForceLimit = recentMulti - multiMembersThreshold; + else + multiForceLimit = FirstMultiXactId; /* * Choose a database to connect to. We pick the database that was least @@ -1964,7 +1966,7 @@ do_autovacuum(void) BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; - int effective_multixact_freeze_max_age; + int64 effective_multixact_freeze_max_age; bool did_vacuum = false; bool found_concurrent_worker = false; int i; @@ -1987,7 +1989,7 @@ do_autovacuum(void) * normally autovacuum_multixact_freeze_max_age, but may be less if we are * short of multixact member space. */ - effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold(); + effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age; /* * Find the pg_database entry and select the default freeze ages. We use @@ -2753,7 +2755,7 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc) static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, - int effective_multixact_freeze_max_age) + int64 effective_multixact_freeze_max_age) { Form_pg_class classForm; HeapTuple classTup; @@ -2792,10 +2794,10 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, /* OK, it needs something done */ if (doanalyze || dovacuum) { - int freeze_min_age; - int freeze_table_age; - int multixact_freeze_min_age; - int multixact_freeze_table_age; + int64 freeze_min_age; + int64 freeze_table_age; + int64 multixact_freeze_min_age; + int64 multixact_freeze_table_age; int vac_cost_limit; double vac_cost_delay; int log_min_duration; @@ -2900,7 +2902,7 @@ static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts, Form_pg_class classForm, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound) @@ -2962,7 +2964,7 @@ relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, /* output params below */ bool *dovacuum, bool *doanalyze, @@ -2991,8 +2993,8 @@ relation_needs_vacanalyze(Oid relid, anltuples; /* freeze parameters */ - int freeze_max_age; - int multixact_freeze_max_age; + int64 freeze_max_age; + int64 multixact_freeze_max_age; TransactionId xidForceLimit; MultiXactId multiForceLimit; @@ -3042,17 +3044,19 @@ relation_needs_vacanalyze(Oid relid, av_enabled = (relopts ? relopts->enabled : true); /* Force vacuum if table is at risk of wraparound */ - xidForceLimit = recentXid - freeze_max_age; - if (xidForceLimit < FirstNormalTransactionId) - xidForceLimit -= FirstNormalTransactionId; + if (recentXid > FirstNormalTransactionId + freeze_max_age) + xidForceLimit = recentXid - freeze_max_age; + else + xidForceLimit = FirstNormalTransactionId; force_vacuum = (TransactionIdIsNormal(classForm->relfrozenxid) && TransactionIdPrecedes(classForm->relfrozenxid, xidForceLimit)); if (!force_vacuum) { - multiForceLimit = recentMulti - multixact_freeze_max_age; - if (multiForceLimit < FirstMultiXactId) - multiForceLimit -= FirstMultiXactId; + if (recentMulti > FirstMultiXactId + multixact_freeze_max_age) + multiForceLimit = recentMulti - multixact_freeze_max_age; + else + multiForceLimit = FirstMultiXactId; force_vacuum = MultiXactIdIsValid(classForm->relminmxid) && MultiXactIdPrecedes(classForm->relminmxid, multiForceLimit); } diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index aa2427ba73f..d42ab3a8fe6 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -846,8 +846,12 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) xl_heap_insert *xlrec; ReorderBufferChange *change; RelFileNode target_node; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_insert *) XLogRecGetData(r); + if (isinit) + rec_data += sizeof(TransactionId); + xlrec = (xl_heap_insert *) rec_data; /* * Ignore insert records without new tuples (this does happen when @@ -903,8 +907,12 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) ReorderBufferChange *change; char *data; RelFileNode target_node; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_update *) XLogRecGetData(r); + if (isinit) + rec_data += sizeof(TransactionId); + xlrec = (xl_heap_update *) rec_data; /* only interested in our database */ XLogRecGetBlockTag(r, 0, &target_node, NULL, NULL); @@ -1064,8 +1072,12 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) char *tupledata; Size tuplelen; RelFileNode rnode; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_multi_insert *) XLogRecGetData(r); + if (isinit) + rec_data += sizeof(TransactionId); + xlrec = (xl_heap_multi_insert *) rec_data; /* * Ignore insert records without new tuples. This happens when a diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c index ff8513e2d29..c1c4adad9dd 100644 --- a/src/backend/replication/logical/proto.c +++ b/src/backend/replication/logical/proto.c @@ -64,7 +64,7 @@ logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn) /* fixed fields */ pq_sendint64(out, txn->final_lsn); pq_sendint64(out, txn->xact_time.commit_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); } /* @@ -78,7 +78,7 @@ logicalrep_read_begin(StringInfo in, LogicalRepBeginData *begin_data) if (begin_data->final_lsn == InvalidXLogRecPtr) elog(ERROR, "final_lsn not set in begin message"); begin_data->committime = pq_getmsgint64(in); - begin_data->xid = pq_getmsgint(in, 4); + begin_data->xid = pq_getmsgint64(in); } @@ -132,7 +132,7 @@ logicalrep_write_begin_prepare(StringInfo out, ReorderBufferTXN *txn) pq_sendint64(out, txn->final_lsn); pq_sendint64(out, txn->end_lsn); pq_sendint64(out, txn->xact_time.prepare_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -152,7 +152,7 @@ logicalrep_read_begin_prepare(StringInfo in, LogicalRepPreparedTxnData *begin_da if (begin_data->end_lsn == InvalidXLogRecPtr) elog(ERROR, "end_lsn not set in begin prepare message"); begin_data->prepare_time = pq_getmsgint64(in); - begin_data->xid = pq_getmsgint(in, 4); + begin_data->xid = pq_getmsgint64(in); /* read gid (copy it into a pre-allocated buffer) */ strlcpy(begin_data->gid, pq_getmsgstring(in), sizeof(begin_data->gid)); @@ -185,7 +185,7 @@ logicalrep_write_prepare_common(StringInfo out, LogicalRepMsgType type, pq_sendint64(out, prepare_lsn); pq_sendint64(out, txn->end_lsn); pq_sendint64(out, txn->xact_time.prepare_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -224,7 +224,7 @@ logicalrep_read_prepare_common(StringInfo in, char *msgtype, if (prepare_data->end_lsn == InvalidXLogRecPtr) elog(ERROR, "end_lsn is not set in %s message", msgtype); prepare_data->prepare_time = pq_getmsgint64(in); - prepare_data->xid = pq_getmsgint(in, 4); + prepare_data->xid = pq_getmsgint64(in); if (prepare_data->xid == InvalidTransactionId) elog(ERROR, "invalid two-phase transaction ID in %s message", msgtype); @@ -265,7 +265,7 @@ logicalrep_write_commit_prepared(StringInfo out, ReorderBufferTXN *txn, pq_sendint64(out, commit_lsn); pq_sendint64(out, txn->end_lsn); pq_sendint64(out, txn->xact_time.commit_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -291,7 +291,7 @@ logicalrep_read_commit_prepared(StringInfo in, LogicalRepCommitPreparedTxnData * if (prepare_data->end_lsn == InvalidXLogRecPtr) elog(ERROR, "end_lsn is not set in commit prepared message"); prepare_data->commit_time = pq_getmsgint64(in); - prepare_data->xid = pq_getmsgint(in, 4); + prepare_data->xid = pq_getmsgint64(in); /* read gid (copy it into a pre-allocated buffer) */ strlcpy(prepare_data->gid, pq_getmsgstring(in), sizeof(prepare_data->gid)); @@ -323,7 +323,7 @@ logicalrep_write_rollback_prepared(StringInfo out, ReorderBufferTXN *txn, pq_sendint64(out, txn->end_lsn); pq_sendint64(out, prepare_time); pq_sendint64(out, txn->xact_time.commit_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -351,7 +351,7 @@ logicalrep_read_rollback_prepared(StringInfo in, elog(ERROR, "rollback_end_lsn is not set in rollback prepared message"); rollback_data->prepare_time = pq_getmsgint64(in); rollback_data->rollback_time = pq_getmsgint64(in); - rollback_data->xid = pq_getmsgint(in, 4); + rollback_data->xid = pq_getmsgint64(in); /* read gid (copy it into a pre-allocated buffer) */ strlcpy(rollback_data->gid, pq_getmsgstring(in), sizeof(rollback_data->gid)); @@ -418,7 +418,7 @@ logicalrep_write_insert(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -467,7 +467,7 @@ logicalrep_write_update(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -541,7 +541,7 @@ logicalrep_write_delete(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -595,7 +595,7 @@ logicalrep_write_truncate(StringInfo out, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); pq_sendint32(out, nrelids); @@ -653,7 +653,7 @@ logicalrep_write_message(StringInfo out, TransactionId xid, XLogRecPtr lsn, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); pq_sendint8(out, flags); pq_sendint64(out, lsn); @@ -675,7 +675,7 @@ logicalrep_write_rel(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -731,7 +731,7 @@ logicalrep_write_typ(StringInfo out, TransactionId xid, Oid typoid) /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(basetypoid)); if (!HeapTupleIsValid(tup)) @@ -1079,7 +1079,7 @@ logicalrep_write_stream_start(StringInfo out, Assert(TransactionIdIsValid(xid)); /* transaction ID (we're starting to stream, so must be valid) */ - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* 1 if this is the first streaming segment for this xid */ pq_sendbyte(out, first_segment ? 1 : 0); @@ -1095,7 +1095,7 @@ logicalrep_read_stream_start(StringInfo in, bool *first_segment) Assert(first_segment); - xid = pq_getmsgint(in, 4); + xid = pq_getmsgint64(in); *first_segment = (pq_getmsgbyte(in) == 1); return xid; @@ -1124,7 +1124,7 @@ logicalrep_write_stream_commit(StringInfo out, ReorderBufferTXN *txn, Assert(TransactionIdIsValid(txn->xid)); /* transaction ID */ - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send the flags field (unused for now) */ pq_sendbyte(out, flags); @@ -1144,7 +1144,7 @@ logicalrep_read_stream_commit(StringInfo in, LogicalRepCommitData *commit_data) TransactionId xid; uint8 flags; - xid = pq_getmsgint(in, 4); + xid = pq_getmsgint64(in); /* read flags (unused for now) */ flags = pq_getmsgbyte(in); @@ -1173,8 +1173,8 @@ logicalrep_write_stream_abort(StringInfo out, TransactionId xid, Assert(TransactionIdIsValid(xid) && TransactionIdIsValid(subxid)); /* transaction ID */ - pq_sendint32(out, xid); - pq_sendint32(out, subxid); + pq_sendint64(out, xid); + pq_sendint64(out, subxid); } /* @@ -1186,8 +1186,8 @@ logicalrep_read_stream_abort(StringInfo in, TransactionId *xid, { Assert(xid && subxid); - *xid = pq_getmsgint(in, 4); - *subxid = pq_getmsgint(in, 4); + *xid = pq_getmsgint64(in); + *subxid = pq_getmsgint64(in); } /* diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index b30372559c8..7abfece37f3 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -5031,8 +5031,12 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot) TransactionId f_mapped_xid; TransactionId f_create_xid; XLogRecPtr f_lsn; - uint32 f_hi, - f_lo; + uint32 f_lsn_hi, + f_lsn_lo, + f_mapped_xid_hi, + f_mapped_xid_lo, + f_create_xid_hi, + f_create_xid_lo; RewriteMappingFile *f; if (strcmp(mapping_de->d_name, ".") == 0 || @@ -5044,11 +5048,14 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot) continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, - &f_dboid, &f_relid, &f_hi, &f_lo, - &f_mapped_xid, &f_create_xid) != 6) + &f_dboid, &f_relid, &f_lsn_hi, &f_lsn_lo, + &f_mapped_xid_hi, &f_mapped_xid_lo, + &f_create_xid_hi, &f_create_xid_lo) != 8) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); - f_lsn = ((uint64) f_hi) << 32 | f_lo; + f_lsn = ((uint64) f_lsn_hi) << 32 | f_lsn_lo; + f_mapped_xid = ((uint64) f_mapped_xid_hi) << 32 | f_mapped_xid_lo; + f_create_xid = ((uint64) f_create_xid_hi) << 32 | f_create_xid_lo; /* mapping for another database */ if (f_dboid != dboid) diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index e9188171070..af9c3078334 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -449,7 +449,7 @@ handle_streamed_transaction(LogicalRepMsgType action, StringInfo s) * We should have received XID of the subxact as the first part of the * message, so extract it. */ - xid = pq_getmsgint(s, 4); + xid = pq_getmsgint64(s); if (!TransactionIdIsValid(xid)) ereport(ERROR, diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c index 8deae571433..d5a8a6be544 100644 --- a/src/backend/replication/pgoutput/pgoutput.c +++ b/src/backend/replication/pgoutput/pgoutput.c @@ -565,7 +565,8 @@ pgoutput_commit_txn(LogicalDecodingContext *ctx, ReorderBufferTXN *txn, if (!sent_begin_txn) { - elog(DEBUG1, "skipped replication of an empty transaction with XID: %u", txn->xid); + elog(DEBUG1, "skipped replication of an empty transaction with XID: %llu", + (unsigned long long) txn->xid); return; } @@ -1927,7 +1928,7 @@ get_schema_sent_in_streamed_txn(RelationSyncEntry *entry, TransactionId xid) foreach(lc, entry->streamed_txns) { - if (xid == (uint32) lfirst_int(lc)) + if (xid == (uint64) lfirst_int64(lc)) return true; } @@ -1945,7 +1946,7 @@ set_schema_sent_in_streamed_txn(RelationSyncEntry *entry, TransactionId xid) oldctx = MemoryContextSwitchTo(CacheMemoryContext); - entry->streamed_txns = lappend_int(entry->streamed_txns, xid); + entry->streamed_txns = lappend_int64(entry->streamed_txns, xid); MemoryContextSwitchTo(oldctx); } diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 7624ceb5f91..452a1fffd91 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -1143,10 +1143,6 @@ static void XLogWalRcvSendHSFeedback(bool immed) { TimestampTz now; - FullTransactionId nextFullXid; - TransactionId nextXid; - uint32 xmin_epoch, - catalog_xmin_epoch; TransactionId xmin, catalog_xmin; static TimestampTz sendTime = 0; @@ -1203,31 +1199,15 @@ XLogWalRcvSendHSFeedback(bool immed) catalog_xmin = InvalidTransactionId; } - /* - * Get epoch and adjust if nextXid and oldestXmin are different sides of - * the epoch boundary. - */ - nextFullXid = ReadNextFullTransactionId(); - nextXid = XidFromFullTransactionId(nextFullXid); - xmin_epoch = EpochFromFullTransactionId(nextFullXid); - catalog_xmin_epoch = xmin_epoch; - if (nextXid < xmin) - xmin_epoch--; - if (nextXid < catalog_xmin) - catalog_xmin_epoch--; - - elog(DEBUG2, "sending hot standby feedback xmin %llu epoch %u catalog_xmin %llu catalog_xmin_epoch %u", - (unsigned long long) xmin, xmin_epoch, - (unsigned long long) catalog_xmin, catalog_xmin_epoch); + elog(DEBUG2, "sending hot standby feedback xmin %llu catalog_xmin %llu", + (unsigned long long) xmin, (unsigned long long) catalog_xmin); /* Construct the message and send it. */ resetStringInfo(&reply_message); pq_sendbyte(&reply_message, 'h'); pq_sendint64(&reply_message, GetCurrentTimestamp()); - pq_sendint32(&reply_message, xmin); - pq_sendint32(&reply_message, xmin_epoch); - pq_sendint32(&reply_message, catalog_xmin); - pq_sendint32(&reply_message, catalog_xmin_epoch); + pq_sendint64(&reply_message, xmin); + pq_sendint64(&reply_message, catalog_xmin); walrcv_send(wrconn, reply_message.data, reply_message.len); if (TransactionIdIsValid(xmin) || TransactionIdIsValid(catalog_xmin)) primary_has_standby_xmin = true; diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 3abad5cb651..197567ff7db 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -255,7 +255,6 @@ static void WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, Tr static XLogRecPtr WalSndWaitForWal(XLogRecPtr loc); static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time); static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now); -static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch); static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p); @@ -2216,44 +2215,6 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbac } } -/* - * Check that the provided xmin/epoch are sane, that is, not in the future - * and not so far back as to be already wrapped around. - * - * Epoch of nextXid should be same as standby, or if the counter has - * wrapped, then one greater than standby. - * - * This check doesn't care about whether clog exists for these xids - * at all. - */ -static bool -TransactionIdInRecentPast(TransactionId xid, uint32 epoch) -{ - FullTransactionId nextFullXid; - TransactionId nextXid; - uint32 nextEpoch; - - nextFullXid = ReadNextFullTransactionId(); - nextXid = XidFromFullTransactionId(nextFullXid); - nextEpoch = EpochFromFullTransactionId(nextFullXid); - - if (xid <= nextXid) - { - if (epoch != nextEpoch) - return false; - } - else - { - if (epoch + 1 != nextEpoch) - return false; - } - - if (!TransactionIdPrecedesOrEquals(xid, nextXid)) - return false; /* epoch OK, but it's wrapped around */ - - return true; -} - /* * Hot Standby feedback */ @@ -2261,9 +2222,7 @@ static void ProcessStandbyHSFeedbackMessage(void) { TransactionId feedbackXmin; - uint32 feedbackEpoch; TransactionId feedbackCatalogXmin; - uint32 feedbackCatalogEpoch; TimestampTz replyTime; /* @@ -2272,10 +2231,8 @@ ProcessStandbyHSFeedbackMessage(void) * of this message. */ replyTime = pq_getmsgint64(&reply_message); - feedbackXmin = pq_getmsgint(&reply_message, 4); - feedbackEpoch = pq_getmsgint(&reply_message, 4); - feedbackCatalogXmin = pq_getmsgint(&reply_message, 4); - feedbackCatalogEpoch = pq_getmsgint(&reply_message, 4); + feedbackXmin = pq_getmsgint64(&reply_message); + feedbackCatalogXmin = pq_getmsgint64(&reply_message); if (message_level_is_interesting(DEBUG2)) { @@ -2284,11 +2241,9 @@ ProcessStandbyHSFeedbackMessage(void) /* Copy because timestamptz_to_str returns a static buffer */ replyTimeStr = pstrdup(timestamptz_to_str(replyTime)); - elog(DEBUG2, "hot standby feedback xmin %llu epoch %u, catalog_xmin %llu epoch %u reply_time %s", + elog(DEBUG2, "hot standby feedback xmin %llu, catalog_xmin %llu reply_time %s", (unsigned long long) feedbackXmin, - feedbackEpoch, (unsigned long long) feedbackCatalogXmin, - feedbackCatalogEpoch, replyTimeStr); pfree(replyTimeStr); @@ -2319,18 +2274,6 @@ ProcessStandbyHSFeedbackMessage(void) return; } - /* - * Check that the provided xmin/epoch are sane, that is, not in the future - * and not so far back as to be already wrapped around. Ignore if not. - */ - if (TransactionIdIsNormal(feedbackXmin) && - !TransactionIdInRecentPast(feedbackXmin, feedbackEpoch)) - return; - - if (TransactionIdIsNormal(feedbackCatalogXmin) && - !TransactionIdInRecentPast(feedbackCatalogXmin, feedbackCatalogEpoch)) - return; - /* * Set the WalSender's xmin equal to the standby's requested xmin, so that * the xmin will be taken into account by GetSnapshotData() / diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index ca48395d5c5..804d95a2852 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -2431,6 +2431,7 @@ statext_expressions_load(Oid stxoid, bool inh, int idx) ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = td; + HeapTupleCopyHeaderXids(&tmptup); tup = heap_copytuple(&tmptup); diff --git a/src/backend/storage/buffer/Makefile b/src/backend/storage/buffer/Makefile index fd7c40dcb08..ffcc0fc290e 100644 --- a/src/backend/storage/buffer/Makefile +++ b/src/backend/storage/buffer/Makefile @@ -17,6 +17,7 @@ OBJS = \ buf_table.o \ bufmgr.o \ freelist.o \ - localbuf.o + localbuf.o \ + heap_convert.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index ae13011d275..8a44b7b8924 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -459,7 +459,7 @@ ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref) ) -static Buffer ReadBuffer_common(SMgrRelation reln, char relpersistence, +static Buffer ReadBuffer_common(Relation rel, SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit); @@ -767,7 +767,8 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, * miss. */ pgstat_count_buffer_read(reln); - buf = ReadBuffer_common(RelationGetSmgr(reln), reln->rd_rel->relpersistence, + buf = ReadBuffer_common(reln, RelationGetSmgr(reln), + reln->rd_rel->relpersistence, forkNum, blockNum, mode, strategy, &hit); if (hit) pgstat_count_buffer_hit(reln); @@ -794,7 +795,9 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, SMgrRelation smgr = smgropen(rnode, InvalidBackendId); - return ReadBuffer_common(smgr, permanent ? RELPERSISTENCE_PERMANENT : + //Assert(InRecovery); + + return ReadBuffer_common(NULL, smgr, permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED, forkNum, blockNum, mode, strategy, &hit); } @@ -806,7 +809,7 @@ ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum, * *hit is set to true if the request was satisfied from shared buffer cache. */ static Buffer -ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, +ReadBuffer_common(Relation rel, SMgrRelation smgr, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit) { @@ -1038,6 +1041,18 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, blockNum, relpath(smgr->smgr_rnode, forkNum)))); } + + if (PageGetPageLayoutVersion(bufBlock) != PG_PAGE_LAYOUT_VERSION && + !PageIsNew((Page) bufBlock)) + { + Buffer buf = BufferDescriptorGetBuffer(bufHdr); + + LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE); + /* Check for no concurrent changes */ + if (PageGetPageLayoutVersion(bufBlock) != PG_PAGE_LAYOUT_VERSION) + convert_page(rel, bufBlock, buf, blockNum); + LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); + } } } @@ -4115,6 +4130,64 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) } } +/* + * Mark buffer as converted - ie its format is changed without logical changes. + * + * It will override `full_page_write` GUC setting in XLogRecordAssemble. + */ +void +MarkBufferConverted(Buffer buffer, bool converted) +{ + BufferDesc *bufHdr; + uint32 buf_state; + bool has_mark; + + if (!BufferIsValid(buffer)) + elog(ERROR, "bad buffer ID: %d", buffer); + + Assert(!BufferIsLocal(buffer)); + + bufHdr = GetBufferDescriptor(buffer - 1); + + Assert(GetPrivateRefCount(buffer) > 0); + if (converted) + { + /* here, either share or exclusive lock is OK */ + Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); + } + + buf_state = pg_atomic_read_u32(&bufHdr->state); + has_mark = (buf_state & BM_CONVERTED) != 0; + if (converted == has_mark) + return; + + buf_state = LockBufHdr(bufHdr); + buf_state &= ~BM_CONVERTED; + if (converted) + buf_state |= BM_CONVERTED; + UnlockBufHdr(bufHdr, buf_state); +} + +bool +IsBufferConverted(Buffer buffer) +{ + + BufferDesc *bufHdr; + uint32 buf_state; + + if (!BufferIsValid(buffer)) + elog(ERROR, "bad buffer ID: %d", buffer); + + Assert(!BufferIsLocal(buffer)); + + bufHdr = GetBufferDescriptor(buffer - 1); + + Assert(GetPrivateRefCount(buffer) > 0); + + buf_state = pg_atomic_read_u32(&bufHdr->state); + return (buf_state & BM_CONVERTED) != 0; +} + /* * Release buffer content locks for shared buffers. * diff --git a/src/backend/storage/buffer/heap_convert.c b/src/backend/storage/buffer/heap_convert.c new file mode 100644 index 00000000000..7c4daca3700 --- /dev/null +++ b/src/backend/storage/buffer/heap_convert.c @@ -0,0 +1,516 @@ +/*------------------------------------------------------------------------- + * + * heap_convert.c + * Heap page converter from 32bit to 64bit xid format + * + * Copyright (c) 2015-2022, Postgres Professional + * + * IDENTIFICATION + * src/backend/storage/buffer/heap_convert.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/generic_xlog.h" +#include "access/heapam.h" +#include "access/multixact.h" +#include "storage/bufmgr.h" +#include "storage/checksum.h" + +static void repack_heap_tuples(Relation rel, Page page, Buffer buf, + BlockNumber blkno, bool double_xmax); + +/* + * itemoffcompare + * Sorting support for repack_tuples() + */ +int +itemoffcompare(const void *item1, const void *item2) +{ + /* Sort in decreasing itemoff order */ + return ((ItemIdCompactData *) item2)->itemoff - + ((ItemIdCompactData *) item1)->itemoff; +} + +/* + * Lazy page conversion from 32-bit to 64-bit XID at first read. + */ +void +convert_page(Relation rel, Page page, Buffer buf, BlockNumber blkno) +{ + static unsigned logcnt = 0; + bool logit; + PageHeader hdr = (PageHeader) page; + GenericXLogState *state = NULL; + uint16 checksum; + bool try_double_xmax; + + /* Not during XLog replaying */ + Assert(rel != NULL); + + /* Verify checksum */ + if (hdr->pd_checksum) + { + checksum = pg_checksum_page((char *) page, blkno); + if (checksum != hdr->pd_checksum) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("page verification failed, calculated checksum %u but expected %u", + checksum, hdr->pd_checksum))); + } + + /* + * We occasionally force logging of page conversion, so never-changed + * pages are converted in the end. FORCE_LOG_EVERY is chosen arbitrarily + * to log neither too much nor too little. + */ +#define FORCE_LOG_EVERY 128 + logit = !RecoveryInProgress() && XLogIsNeeded() && RelationNeedsWAL(rel); + logit = logit && (++logcnt % FORCE_LOG_EVERY) == 0; + if (logit) + { + state = GenericXLogStart(rel); + page = GenericXLogRegisterBuffer(state, buf, + GENERIC_XLOG_FULL_IMAGE); + hdr = (PageHeader) page; + } + + /* Not already converted */ + Assert(PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION); + + switch (rel->rd_rel->relkind) + { + case 'r': + case 'p': + case 't': + case 'm': + try_double_xmax = hdr->pd_upper - hdr->pd_lower < + MAXALIGN(sizeof(HeapPageSpecialData)); + repack_heap_tuples(rel, page, buf, blkno, try_double_xmax); + break; + case 'i': + /* no need to convert index */ + case 'S': + /* no real need to convert sequences */ + break; + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("conversion for relation \"%s\" cannot be done", + RelationGetRelationName(rel)), + errdetail_relkind_not_supported(rel->rd_rel->relkind))); + } + + hdr->pd_checksum = pg_checksum_page((char *) page, blkno); + + PageSetPageSizeAndVersion((hdr), PageGetPageSize(hdr), + PG_PAGE_LAYOUT_VERSION); + + if (logit) + { + /* + * Finish logging buffer conversion and mark buffer as dirty. + */ + Assert(state != NULL); + MarkBufferDirty(buf); + GenericXLogFinish(state); + } + else + { + /* + * Otherwise, it will be logged with full-page-write record on first + * actual change. + */ + MarkBufferConverted(buf, true); + } +} + +/* + * Convert xmin and xmax in a tuple. + * This also considers special cases: "double xmax" page format and multixact + * in xmax. + */ +static void +convert_heap_tuple_xids(HeapTupleHeader tuple, TransactionId xid_base, + MultiXactId multi_base, bool double_xmax) +{ + /* Convert xmin */ + if (double_xmax) + { + /* Prepare tuple for "double xmax" page format */ + tuple->t_infomask |= HEAP_XMIN_FROZEN; + tuple->t_choice.t_heap.t_xmin = 0; + } + else + { + TransactionId xmin = tuple->t_choice.t_heap.t_xmin; + + if (TransactionIdIsNormal(xmin)) + { + if (HeapTupleHeaderXminFrozen(tuple)) + tuple->t_choice.t_heap.t_xmin = FrozenTransactionId; + else if (HeapTupleHeaderXminInvalid(tuple)) + tuple->t_choice.t_heap.t_xmin = InvalidTransactionId; + else + { + Assert(xmin >= xid_base + FirstNormalTransactionId); + /* Subtract xid_base from normal xmin */ + tuple->t_choice.t_heap.t_xmin = xmin - xid_base; + } + } + } + + /* If tuple has multixact flag, handle mxid wraparound */ + if ((tuple->t_infomask & HEAP_XMAX_IS_MULTI) && + !(tuple->t_infomask & HEAP_XMAX_INVALID)) + { + MultiXactId mxid = tuple->t_choice.t_heap.t_xmax; + + /* Handle mxid wraparound */ + if (mxid < multi_base) + { + mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId; + Assert(mxid >= multi_base); + } + + if (double_xmax) + { + /* Save converted mxid into "double xmax" format */ + HeapTupleHeaderSetDoubleXmax(tuple, mxid); + } + else + { + /* + * Save converted mxid offset relative to (minmxid - 1), which + * will be page's mxid base. + */ + Assert(mxid - multi_base + FirstMultiXactId <= PG_UINT32_MAX); + tuple->t_choice.t_heap.t_xmax = + (uint32) (mxid - multi_base + FirstMultiXactId); + } + } + /* Convert xmax */ + else if (!(tuple->t_infomask & HEAP_XMAX_INVALID)) + { + TransactionId xmax = tuple->t_choice.t_heap.t_xmax; + + if (double_xmax) + { + /* Save converted xmax into "double xmax" format */ + HeapTupleHeaderSetDoubleXmax(tuple, xmax); + } + else if (TransactionIdIsNormal(xmax)) + { + /* Subtract xid_base from normal xmax */ + Assert(xmax >= xid_base + FirstNormalTransactionId); + tuple->t_choice.t_heap.t_xmax = xmax - xid_base; + } + } + else + { + if (double_xmax) + HeapTupleHeaderSetDoubleXmax(tuple, InvalidTransactionId); + else + tuple->t_choice.t_heap.t_xmax = InvalidTransactionId; + } +} + +/* + * Correct page xmin/xmax based on tuple xmin/xmax values. + */ +static void +compute_xid_min_max(HeapTuple tuple, MultiXactId multi_base, + TransactionId *xid_min, TransactionId *xid_max, + MultiXactId *multi_min, MultiXactId *multi_max) +{ + /* xmin */ + if (!HeapTupleHeaderXminInvalid(tuple->t_data) && + !HeapTupleHeaderXminFrozen(tuple->t_data)) + { + TransactionId xid = HeapTupleGetRawXmin(tuple); + + if (TransactionIdIsNormal(xid)) + { + *xid_max = Max(*xid_max, xid); + *xid_min = Min(*xid_min, xid); + } + } + + /* xmax */ + if (!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID)) + { + TransactionId xid; + + if (tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) + { + MultiXactId mxid = HeapTupleGetRawXmax(tuple); + + Assert(MultiXactIdIsValid(mxid)); + + /* Handle mxid wraparound */ + if (mxid < multi_base) + { + mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId; + Assert(mxid >= multi_base); + } + + *multi_max = Max(*multi_max, mxid); + *multi_min = Min(*multi_min, mxid); + + /* + * Also take into account hidden update xid, which can be + * extracted by the vacuum. + */ + if (tuple->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY) + xid = InvalidTransactionId; + else + xid = HeapTupleGetUpdateXid(tuple); + } + else + { + xid = HeapTupleGetRawXmax(tuple); + } + + if (TransactionIdIsNormal(xid)) + { + *xid_max = Max(*xid_max, xid); + *xid_min = Min(*xid_min, xid); + } + } +} + +/* + * Returns true if both: + * - xid_max: an uppper boundary of xmin's and xmax'es of all tuples on a page + * - xid_min: a lower boundary of xmin's and xmax'es of all tuples on a page + * can be expressed by 32-bit number relative to page's xid_base/multi_base + * or invalid. + * + * True value effectively means that these tuples can be directly put on one + * page in 64-xid format. + */ +static inline bool +xids_fit_page(TransactionId xid_min, TransactionId xid_max, + MultiXactId multi_min, MultiXactId multi_max) +{ + bool xid_max_fits = false; + bool multi_max_fits = false; + + if (xid_max == InvalidTransactionId) + xid_max_fits = true; + + if (xid_max - xid_min <= MaxShortTransactionId - FirstNormalTransactionId) + xid_max_fits = true; + + if (multi_max == InvalidMultiXactId) + multi_max_fits = true; + + if (multi_max - multi_min <= MaxShortTransactionId - FirstMultiXactId) + multi_max_fits = true; + + return xid_max_fits && multi_max_fits; +} + +/* + * Set "base" for page in 64-bit XID format. + * + * This should not be called for double xmax pages. They do not have place for + * page special. + */ +static inline void +heap_page_set_base(Page page, + TransactionId xid_min, TransactionId xid_max, + MultiXactId multi_min, MultiXactId multi_max, + TransactionId *xid_base, MultiXactId *multi_base) +{ + PageHeader hdr = (PageHeader) page; + HeapPageSpecial special; + + if (xid_max != InvalidTransactionId) + *xid_base = xid_min - FirstNormalTransactionId; + else + *xid_base = InvalidTransactionId; + + if (multi_max != InvalidMultiXactId) + *multi_base = multi_min - FirstMultiXactId; + else + *multi_base = InvalidMultiXactId; + + hdr->pd_special = BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData)); + special = HeapPageGetSpecial(page); + special->pd_xid_base = *xid_base; + special->pd_multi_base = *multi_base; +} + +/* + * repack_heap_tuples + * Convert heap page format reusing space of dead tuples + */ +static void +repack_heap_tuples(Relation rel, Page page, Buffer buf, BlockNumber blkno, + bool try_double_xmax) +{ + ItemIdCompactData items[MaxHeapTuplesPerPage]; + ItemIdCompact itemPtr = items; + int nitems = 0, + maxoff = PageGetMaxOffsetNumber(page), + idx, + occupied_space = 0; + Offset upper; + bool double_xmax, + special_fits; + PageHeader hdr = (PageHeader) page, + new_hdr; + char new_page[BLCKSZ] = {0}; + MultiXactId multi_base = rel->rd_rel->relminmxid, + multi_min = MaxMultiXactId, + multi_max = InvalidMultiXactId; + TransactionId xid_base = rel->rd_rel->relfrozenxid, + xid_min = MaxTransactionId, + xid_max = InvalidTransactionId; + + if (TransactionIdIsNormal(hdr->pd_prune_xid)) + xid_min = xid_max = hdr->pd_prune_xid; + + for (idx = 0; idx < maxoff; idx++) + { + HeapTupleData tuple; + ItemId lp; + + lp = PageGetItemId(page, idx + 1); + + /* Skip redirects and items without storage */ + if (!ItemIdHasStorage(lp)) + continue; + + /* Build in-memory tuple representation */ + tuple.t_tableOid = 1; /* doesn't matter in this case */ + tuple.t_data = (HeapTupleHeader) PageGetItem(page, lp); + HeapTupleCopyHeaderXids(&tuple); + tuple.t_len = ItemIdGetLength(lp); + ItemPointerSet(&(tuple.t_self), blkno, ItemIdGetOffset(lp)); + + /* + * This is only needed to determine whether tuple is HEAPTUPLE_DEAD or + * HEAPTUPLE_RECENTLY_DEAD. And since this is the first time we read + * page after pg_upgrade, it cannot be HEAPTUPLE_RECENTLY_DEAD. See + * HeapTupleSatisfiesVacuum() for details + */ + if (try_double_xmax && + HeapTupleSatisfiesVacuum(&tuple, + (TransactionId) 1 << 32, buf) == HEAPTUPLE_DEAD) + { + ItemIdSetDead(lp); + } + + if (ItemIdIsNormal(lp) && ItemIdHasStorage(lp)) + { + itemPtr->offsetindex = idx; + itemPtr->itemoff = ItemIdGetOffset(lp); + if (unlikely(itemPtr->itemoff < hdr->pd_upper || + itemPtr->itemoff >= hdr->pd_special)) + { + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("corrupted item pointer: %u", + itemPtr->itemoff))); + } + + itemPtr->alignedlen = MAXALIGN(ItemIdGetLength(lp)); + occupied_space += itemPtr->alignedlen; + nitems++; + itemPtr++; + if (try_double_xmax) + { + HeapTupleSetXmin(&tuple, FrozenTransactionId); + HeapTupleHeaderSetXminFrozen(tuple.t_data); + } + + compute_xid_min_max(&tuple, multi_base, + &xid_min, &xid_max, + &multi_min, &multi_max); + } + } + + /* Write new header */ + new_hdr = (PageHeader) new_page; + *new_hdr = *hdr; + new_hdr->pd_lower = SizeOfPageHeaderData + maxoff * sizeof(ItemIdData); + + /* Page in 32-bit xid format should not have PageSpecial. */ + Assert(PageGetSpecialSize(new_hdr) == 0); + + special_fits = BLCKSZ - new_hdr->pd_lower - occupied_space >= + sizeof(HeapPageSpecialData); + double_xmax = !special_fits || + !xids_fit_page(xid_min, xid_max, multi_min, multi_max); + + if (!double_xmax) + { + Assert(xid_max == InvalidTransactionId || xid_max >= xid_min); + Assert(multi_max == InvalidMultiXactId || multi_max >= multi_min); + + heap_page_set_base(new_page, + xid_min, xid_max, + multi_min, multi_max, + &xid_base, &multi_base); + + HeapPageSetPruneXid(new_page, new_hdr->pd_prune_xid); + } + else + { + /* No space for special area, switch to "double xmax" format */ + elog(DEBUG2, "convert heap page %u of relation \"%s\" to double xmax format", + blkno, RelationGetRelationName(rel)); + + if (try_double_xmax) + { + xid_base = InvalidTransactionId; + multi_base = InvalidMultiXactId; + } + else + { + repack_heap_tuples(rel, page, buf, blkno, true); + return; + } + } + + /* Copy ItemIds with an offset */ + memcpy((char *) new_page + SizeOfPageHeaderData, + (char *) page + SizeOfPageHeaderData, + hdr->pd_lower - SizeOfPageHeaderData); + + /* Move live tuples */ + upper = new_hdr->pd_special; + for (idx = 0; idx < nitems; idx++) + { + HeapTupleHeader tuple; + ItemId lp; + + itemPtr = &items[idx]; + lp = PageGetItemId(new_page, itemPtr->offsetindex + 1); + upper -= itemPtr->alignedlen; + occupied_space -= itemPtr->alignedlen; + + memcpy((char *) new_page + upper, + (char *) page + itemPtr->itemoff, + itemPtr->alignedlen); + + tuple = (HeapTupleHeader) (((char *) new_page) + upper); + + convert_heap_tuple_xids(tuple, xid_base, multi_base, double_xmax); + + lp->lp_off = upper; + } + + Assert(occupied_space == 0); + + new_hdr->pd_upper = upper; + if (new_hdr->pd_lower > new_hdr->pd_upper) + elog(ERROR, "cannot convert block %u of relation \"%s\"", + blkno, RelationGetRelationName(rel)); + + memcpy(page, new_page, BLCKSZ); +} diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index e3dfb135de6..81a0e4be2c4 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -66,7 +66,7 @@ #include "utils/rel.h" #include "utils/snapmgr.h" -#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var)))) +#define UINT64_ACCESS_ONCE(var) ((uint64)(*((volatile uint64 *)&(var)))) /* Our shared memory area */ typedef struct ProcArrayStruct @@ -350,9 +350,6 @@ static inline void ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId l static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid); static void MaintainLatestCompletedXid(TransactionId latestXid); static void MaintainLatestCompletedXidRecovery(TransactionId latestXid); - -static inline FullTransactionId FullXidRelativeTo(FullTransactionId rel, - TransactionId xid); static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons); /* @@ -959,8 +956,7 @@ MaintainLatestCompletedXid(TransactionId latestXid) if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid)) { - ShmemVariableCache->latestCompletedXid = - FullXidRelativeTo(cur_latest, latestXid); + ShmemVariableCache->latestCompletedXid = FullTransactionIdFromXid(latestXid); } Assert(IsBootstrapProcessingMode() || @@ -974,7 +970,6 @@ static void MaintainLatestCompletedXidRecovery(TransactionId latestXid) { FullTransactionId cur_latest = ShmemVariableCache->latestCompletedXid; - FullTransactionId rel; Assert(AmStartupProcess() || !IsUnderPostmaster); Assert(LWLockHeldByMe(ProcArrayLock)); @@ -984,14 +979,12 @@ MaintainLatestCompletedXidRecovery(TransactionId latestXid) * latestCompletedXid to be initialized in recovery. But in recovery it's * safe to access nextXid without a lock for the startup process. */ - rel = ShmemVariableCache->nextXid; Assert(FullTransactionIdIsValid(ShmemVariableCache->nextXid)); if (!FullTransactionIdIsValid(cur_latest) || TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid)) { - ShmemVariableCache->latestCompletedXid = - FullXidRelativeTo(rel, latestXid); + ShmemVariableCache->latestCompletedXid = FullTransactionIdFromXid(latestXid); } Assert(FullTransactionIdIsNormal(ShmemVariableCache->latestCompletedXid)); @@ -1465,7 +1458,7 @@ TransactionIdIsInProgress(TransactionId xid) continue; /* Fetch xid just once - see GetNewTransactionId */ - pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]); + pxid = UINT64_ACCESS_ONCE(other_xids[pgxactoff]); if (!TransactionIdIsValid(pxid)) continue; @@ -1497,7 +1490,7 @@ TransactionIdIsInProgress(TransactionId xid) for (j = pxids - 1; j >= 0; j--) { /* Fetch xid just once - see GetNewTransactionId */ - TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]); + TransactionId cxid = UINT64_ACCESS_ONCE(proc->subxids.xids[j]); if (TransactionIdEquals(cxid, xid)) { @@ -1621,7 +1614,7 @@ TransactionIdIsActive(TransactionId xid) TransactionId pxid; /* Fetch xid just once - see GetNewTransactionId */ - pxid = UINT32_ACCESS_ONCE(other_xids[i]); + pxid = UINT64_ACCESS_ONCE(other_xids[i]); if (!TransactionIdIsValid(pxid)) continue; @@ -1768,8 +1761,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) TransactionId xmin; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); - xmin = UINT32_ACCESS_ONCE(proc->xmin); + xid = UINT64_ACCESS_ONCE(other_xids[index]); + xmin = UINT64_ACCESS_ONCE(proc->xmin); /* * Consider both the transaction's Xmin, and its Xid. @@ -2295,7 +2288,7 @@ GetSnapshotData(Snapshot snapshot) for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++) { /* Fetch xid just once - see GetNewTransactionId */ - TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]); + TransactionId xid = UINT64_ACCESS_ONCE(other_xids[pgxactoff]); uint8 statusFlags; Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff); @@ -2445,12 +2438,7 @@ GetSnapshotData(Snapshot snapshot) FullTransactionId def_vis_fxid_data; FullTransactionId oldestfxid; - /* - * Converting oldestXid is only safe when xid horizon cannot advance, - * i.e. holding locks. While we don't hold the lock anymore, all the - * necessary data has been gathered with lock held. - */ - oldestfxid = FullXidRelativeTo(latest_completed, oldestxid); + oldestfxid = FullTransactionIdFromXid(oldestxid); /* apply vacuum_defer_cleanup_age */ def_vis_xid_data = @@ -2473,8 +2461,8 @@ GetSnapshotData(Snapshot snapshot) def_vis_xid = TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid); - def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid); - def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data); + def_vis_fxid = FullTransactionIdFromXid(def_vis_xid); + def_vis_fxid_data = FullTransactionIdFromXid(def_vis_xid_data); /* * Check if we can increase upper bound. As a previous @@ -2493,7 +2481,7 @@ GetSnapshotData(Snapshot snapshot) /* See temp_oldest_nonremovable computation in ComputeXidHorizons() */ if (TransactionIdIsNormal(myxid)) GlobalVisTempRels.definitely_needed = - FullXidRelativeTo(latest_completed, myxid); + FullTransactionIdFromXid(myxid); else { GlobalVisTempRels.definitely_needed = latest_completed; @@ -2600,7 +2588,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin, /* * Likewise, let's just make real sure its xmin does cover us. */ - xid = UINT32_ACCESS_ONCE(proc->xmin); + xid = UINT64_ACCESS_ONCE(proc->xmin); if (!TransactionIdIsNormal(xid) || !TransactionIdPrecedesOrEquals(xid, xmin)) continue; @@ -2655,7 +2643,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc) * can't go backwards. Also, make sure it's running in the same database, * so that the per-database xmin cannot go backwards. */ - xid = UINT32_ACCESS_ONCE(proc->xmin); + xid = UINT64_ACCESS_ONCE(proc->xmin); if (proc->databaseId == MyDatabaseId && TransactionIdIsNormal(xid) && TransactionIdPrecedesOrEquals(xid, xmin)) @@ -2774,7 +2762,7 @@ GetRunningTransactionData(void) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = UINT64_ACCESS_ONCE(other_xids[index]); /* * We don't need to store transactions that don't have a TransactionId @@ -2913,7 +2901,7 @@ GetOldestActiveTransactionId(void) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = UINT64_ACCESS_ONCE(other_xids[index]); if (!TransactionIdIsNormal(xid)) continue; @@ -3011,7 +2999,7 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = UINT64_ACCESS_ONCE(other_xids[index]); if (!TransactionIdIsNormal(xid)) continue; @@ -3300,7 +3288,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, if (allDbs || proc->databaseId == MyDatabaseId) { /* Fetch xmin just once - might change on us */ - TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin); + TransactionId pxmin = UINT64_ACCESS_ONCE(proc->xmin); if (excludeXmin0 && !TransactionIdIsValid(pxmin)) continue; @@ -3395,7 +3383,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) proc->databaseId == dbOid) { /* Fetch xmin just once - can't change on us, but good coding */ - TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin); + TransactionId pxmin = UINT64_ACCESS_ONCE(proc->xmin); /* * We ignore an invalid pxmin because this means that backend has @@ -4102,17 +4090,13 @@ static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons) { GlobalVisSharedRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->shared_oldest_nonremovable); + FullTransactionIdFromXid(horizons->shared_oldest_nonremovable); GlobalVisCatalogRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->catalog_oldest_nonremovable); + FullTransactionIdFromXid(horizons->catalog_oldest_nonremovable); GlobalVisDataRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->data_oldest_nonremovable); + FullTransactionIdFromXid(horizons->data_oldest_nonremovable); GlobalVisTempRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->temp_oldest_nonremovable); + FullTransactionIdFromXid(horizons->temp_oldest_nonremovable); /* * In longer running transactions it's possible that transactions we @@ -4201,15 +4185,7 @@ GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid) { FullTransactionId fxid; - /* - * Convert 32 bit argument to FullTransactionId. We can do so safely - * because we know the xid has to, at the very least, be between - * [oldestXid, nextFullXid), i.e. within 2 billion of xid. To avoid taking - * a lock to determine either, we can just compare with - * state->definitely_needed, which was based on those value at the time - * the current snapshot was built. - */ - fxid = FullXidRelativeTo(state->definitely_needed, xid); + fxid = FullTransactionIdFromXid(xid); return GlobalVisTestIsRemovableFullXid(state, fxid); } @@ -4272,32 +4248,6 @@ GlobalVisCheckRemovableXid(Relation rel, TransactionId xid) return GlobalVisTestIsRemovableXid(state, xid); } -/* - * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it - * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel). - * - * Be very careful about when to use this function. It can only safely be used - * when there is a guarantee that xid is within MaxTransactionId / 2 xids of - * rel. That e.g. can be guaranteed if the caller assures a snapshot is - * held by the backend and xid is from a table (where vacuum/freezing ensures - * the xid has to be within that range), or if xid is from the procarray and - * prevents xid wraparound that way. - */ -static inline FullTransactionId -FullXidRelativeTo(FullTransactionId rel, TransactionId xid) -{ - TransactionId rel_xid = XidFromFullTransactionId(rel); - - Assert(TransactionIdIsValid(xid)); - Assert(TransactionIdIsValid(rel_xid)); - - /* not guaranteed to find issues, but likely to catch mistakes */ - AssertTransactionIdInAllowableRange(xid); - - return FullTransactionIdFromU64(U64FromFullTransactionId(rel) - + (int32) (xid - rel_xid)); -} - /* ---------------------------------------------- * KnownAssignedTransactionIds sub-module diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 65c9931a56a..4c2dd7dfe2a 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -37,7 +37,7 @@ #include "utils/timestamp.h" /* User-settable GUC parameters */ -int vacuum_defer_cleanup_age; +int64 vacuum_defer_cleanup_age; int max_standby_archive_delay = 30 * 1000; int max_standby_streaming_delay = 30 * 1000; bool log_recovery_conflict_waits = false; @@ -486,8 +486,8 @@ ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId latestRemovedFullXi FullTransactionId nextXid = ReadNextFullTransactionId(); uint64 diff; - diff = U64FromFullTransactionId(nextXid) - - U64FromFullTransactionId(latestRemovedFullXid); + diff = XidFromFullTransactionId(nextXid) - + XidFromFullTransactionId(latestRemovedFullXid); if (diff < MaxTransactionId / 2) { TransactionId latestRemovedXid; diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 1543da61620..c7b21793601 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -1163,10 +1163,18 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag) tag->locktag_field1); break; case LOCKTAG_TRANSACTION: - appendStringInfo(buf, - _("transaction %u"), - tag->locktag_field1); - break; + { + char xid_str[32]; + + /* make translatable string */ + snprintf(xid_str, sizeof(xid_str), "%llu", + (unsigned long long) + (TransactionId) tag->locktag_field1 | + ((TransactionId) tag->locktag_field2 << 32)); + + appendStringInfo(buf, _("transaction %s"), xid_str); + break; + } case LOCKTAG_VIRTUALTRANSACTION: appendStringInfo(buf, _("virtual transaction %d/%u"), diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index f0e4ac88987..36577fd6ba2 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -333,9 +333,9 @@ static SlruCtlData SerialSlruCtlData; #define SerialValue(slotno, xid) (*((SerCommitSeqNo *) \ (SerialSlruCtl->shared->page_buffer[slotno] + \ - ((((uint32) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE)))) + ((((uint64) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE)))) -#define SerialPage(xid) (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE) +#define SerialPage(xid) ((int64) (((uint64) (xid)) / SERIAL_ENTRIESPERPAGE)) typedef struct SerialControlData { diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index a3d367db511..db74be0bfd8 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -21,11 +21,26 @@ #include "storage/checksum.h" #include "utils/memdebug.h" #include "utils/memutils.h" +#include "utils/snapmgr.h" /* GUC variable */ bool ignore_checksum_failure = false; +/* + * HeapPageSpecialData used when pd_special == BLCKSZ. This is special format + * used when page with 32-bit xids doesn't fit HeapPageSpecialData. Then + * all xmin's are frozen (can do this for all live tuples after pg_upgrade), + * while 64-bit xmax is stored in both t_heap.t_xmin and t_heap.t_xmax. + * This is so-called "double xmax" format. + */ +static HeapPageSpecialData doubleXmaxSpecialData = +{ + .pd_xid_base = MaxTransactionId, + .pd_multi_base = MaxTransactionId +}; +HeapPageSpecial doubleXmaxSpecial = &doubleXmaxSpecialData; + /* ---------------------------------------------------------------- * Page support functions @@ -432,15 +447,131 @@ PageRestoreTempPage(Page tempPage, Page oldPage) } /* - * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete + * Get minimum and maximum values of xid and multixact on "double xmax" page. */ -typedef struct itemIdCompactData +static void +heap_page_double_xmax_get_min_max(Page page, + TransactionId *xid_min, + TransactionId *xid_max, + MultiXactId *multi_min, + MultiXactId *multi_max) { - uint16 offsetindex; /* linp array index */ - int16 itemoff; /* page offset of item data */ - uint16 alignedlen; /* MAXALIGN(item data len) */ -} itemIdCompactData; -typedef itemIdCompactData *itemIdCompact; + bool xid_found = false, + multi_found = false; + OffsetNumber offnum, + maxoff; + + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleHeader htup; + TransactionId xmax; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + xmax = HeapTupleHeaderGetDoubleXmax(htup); + + if (!TransactionIdIsNormal(xmax)) + continue; + + if (!(htup->t_infomask & HEAP_XMAX_IS_MULTI)) + { + if (!xid_found) + { + *xid_min = *xid_max = xmax; + xid_found = true; + } + else + { + *xid_min = Min(*xid_min, xmax); + *xid_max = Max(*xid_max, xmax); + } + } + else + { + if (!multi_found) + { + *multi_min = *multi_max = xmax; + multi_found = true; + } + else + { + *multi_min = Min(*multi_min, xmax); + *multi_max = Max(*multi_max, xmax); + } + } + } +} + +/* + * Add special area to heap page, so convert from "double xmax" to normal + * format. + */ +static void +heap_page_add_special_area(ItemIdCompact itemidbase, int nitems, Page page, + TransactionId xid_base, MultiXactId multi_base) +{ + char newPage[BLCKSZ]; + PageHeader phdr = (PageHeader) page; + PageHeader new_phdr = (PageHeader) newPage; + HeapPageSpecial special; + Offset upper; + int i; + + memcpy(newPage, page, phdr->pd_lower); + + /* Add special area */ + new_phdr->pd_special = PageGetPageSize(newPage) - sizeof(HeapPageSpecialData); + special = (HeapPageSpecial) ((Pointer) (newPage) + new_phdr->pd_special); + special->pd_xid_base = xid_base; + special->pd_multi_base = multi_base; + + /* sort itemIdSortData array into decreasing itemoff order */ + qsort((char *) itemidbase, nitems, sizeof(ItemIdCompactData), + itemoffcompare); + + upper = new_phdr->pd_special; + for (i = 0; i < nitems; i++) + { + ItemIdCompact itemidptr = &itemidbase[i]; + ItemId lp; + HeapTupleHeader old_htup; + HeapTupleHeader new_htup; + TransactionId xmax; + + lp = PageGetItemId(page, itemidptr->offsetindex + 1); + old_htup = (HeapTupleHeader) PageGetItem(page, lp); + upper -= itemidptr->alignedlen; + memcpy((Pointer) newPage + upper, + (Pointer) page + itemidptr->itemoff, + itemidptr->alignedlen); + lp = PageGetItemId(newPage, itemidptr->offsetindex + 1); + lp->lp_off = upper; + new_htup = (HeapTupleHeader) PageGetItem(newPage, lp); + + /* Convert xmax value */ + new_htup->t_choice.t_heap.t_xmin = FrozenTransactionId; + xmax = HeapTupleHeaderGetDoubleXmax(old_htup); + if (!(new_htup->t_infomask & HEAP_XMAX_IS_MULTI)) + new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(xid_base, xmax); + else + new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(multi_base, xmax); + } + + new_phdr->pd_upper = upper; + + memcpy(page, newPage, PageGetPageSize(newPage)); + elog(DEBUG2, "convert heap page from double xmax to normal format"); +} /* * After removing or marking some line pointers unused, move the tuples to @@ -471,21 +602,46 @@ typedef itemIdCompactData *itemIdCompact; * Callers must ensure that nitems is > 0 */ static void -compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted) +compactify_tuples(ItemIdCompact itemidbase, int nitems, Page page, + bool presorted, bool addspecial) { PageHeader phdr = (PageHeader) page; Offset upper; Offset copy_tail; Offset copy_head; - itemIdCompact itemidptr; + ItemIdCompact itemidptr; int i; /* Code within will not work correctly if nitems == 0 */ Assert(nitems > 0); - if (presorted) + /* Add special area to the heap page if possible */ + if (addspecial) { + TransactionId xid_min = FirstNormalTransactionId, + xid_max = FirstNormalTransactionId; + MultiXactId multi_min = FirstNormalTransactionId, + multi_max = FirstNormalTransactionId; + + Assert(phdr->pd_special == PageGetPageSize(page)); + heap_page_double_xmax_get_min_max(page, &xid_min, &xid_max, + &multi_min, &multi_max); + + if (xid_max - xid_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId) && + multi_max - multi_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId)) + { + Assert(xid_min >= FirstNormalTransactionId); + Assert(multi_min >= FirstNormalTransactionId); + heap_page_add_special_area(itemidbase, nitems, page, + xid_min - FirstNormalTransactionId, + multi_min - FirstNormalTransactionId); + return; + } + } + + if (presorted) + { #ifdef USE_ASSERT_CHECKING { /* @@ -702,8 +858,8 @@ PageRepairFragmentation(Page page) Offset pd_upper = ((PageHeader) page)->pd_upper; Offset pd_special = ((PageHeader) page)->pd_special; Offset last_offset; - itemIdCompactData itemidbase[MaxHeapTuplesPerPage]; - itemIdCompact itemidptr; + ItemIdCompactData itemidbase[MaxHeapTuplesPerPage]; + ItemIdCompact itemidptr; ItemId lp; int nline, nstorage, @@ -777,11 +933,21 @@ PageRepairFragmentation(Page page) nstorage = itemidptr - itemidbase; if (nstorage == 0) { + if (pd_special == PageGetPageSize(page)) + { + pd_special = PageGetPageSize(page) - sizeof(HeapPageSpecialData); + ((PageHeader) page)->pd_special = pd_special; + HeapPageGetSpecial(page)->pd_xid_base = 0; + HeapPageGetSpecial(page)->pd_multi_base = 0; + } + /* Page is completely empty, so just reset it quickly */ ((PageHeader) page)->pd_upper = pd_special; } else { + bool addspecial = false; + /* Need to compact the page the hard way */ if (totallen > (Size) (pd_special - pd_lower)) ereport(ERROR, @@ -789,7 +955,15 @@ PageRepairFragmentation(Page page) errmsg("corrupted item lengths: total %u, available space %u", (unsigned int) totallen, pd_special - pd_lower))); - compactify_tuples(itemidbase, nstorage, page, presorted); + /* + * Try to add special area to the heap page if it has enough of free + * space. + */ + if (pd_special == PageGetPageSize(page) && + (Size) (pd_special - pd_lower) - totallen >= sizeof(HeapPageSpecialData)) + addspecial = true; + + compactify_tuples(itemidbase, nstorage, page, presorted, addspecial); } if (finalusedlp != nline) @@ -992,6 +1166,9 @@ PageGetHeapFreeSpace(Page page) { Size space; + if (HeapPageIsDoubleXmax(page)) + return 0; + space = PageGetFreeSpace(page); if (space > 0) { @@ -1165,9 +1342,9 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems) Offset pd_upper = phdr->pd_upper; Offset pd_special = phdr->pd_special; Offset last_offset; - itemIdCompactData itemidbase[MaxIndexTuplesPerPage]; + ItemIdCompactData itemidbase[MaxIndexTuplesPerPage]; ItemIdData newitemids[MaxIndexTuplesPerPage]; - itemIdCompact itemidptr; + ItemIdCompact itemidptr; ItemId lp; int nline, nused; @@ -1275,7 +1452,7 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems) /* and compactify the tuple data */ if (nused > 0) - compactify_tuples(itemidbase, nused, page, presorted); + compactify_tuples(itemidbase, nused, page, presorted, false); else phdr->pd_upper = pd_special; } diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 8bfb46eb194..f8c0de939c1 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -3810,15 +3810,13 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx, case 'm': { - unsigned long value; - char *endptr; + char *endptr; errno = 0; - value = strtoul(optarg, &endptr, 0); - start_mxid = value; + start_mxid = strtoull(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0 || - value != start_mxid) /* overflow */ + !StartMultiXactIdIsValid(start_mxid)) { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -3841,15 +3839,13 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx, case 'o': { - unsigned long value; - char *endptr; + char *endptr; errno = 0; - value = strtoul(optarg, &endptr, 0); - start_mxoff = value; + start_mxoff = strtoull(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0 || - value != start_mxoff) /* overflow */ + !StartMultiXactOffsetIsValid(start_mxoff)) { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -3914,15 +3910,13 @@ process_postgres_switches(int argc, char *argv[], GucContext ctx, case 'x': { - unsigned long value; - char *endptr; + char *endptr; errno = 0; - value = strtoul(optarg, &endptr, 0); - start_xid = value; + start_xid = strtoull(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0 || - value != start_xid) /* overflow */ + !StartTransactionIdIsValid(start_xid)) { ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), @@ -4087,7 +4081,6 @@ PostgresSingleUserMain(int argc, char *argv[], PostgresMain(dbname, username); } - /* ---------------------------------------------------------------- * PostgresMain * postgres main loop -- all backends, interactive or otherwise loop here diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c index 0cc7a6d8ad0..04ca6ff16e7 100644 --- a/src/backend/utils/adt/enum.c +++ b/src/backend/utils/adt/enum.c @@ -76,7 +76,7 @@ check_safe_enum_use(HeapTuple enumval_tup) * Usually, a row would get hinted as committed when it's read or loaded * into syscache; but just in case not, let's check the xmin directly. */ - xmin = HeapTupleHeaderGetXmin(enumval_tup->t_data); + xmin = HeapTupleGetXmin(enumval_tup); if (!TransactionIdIsInProgress(xmin) && TransactionIdDidCommit(xmin)) return; diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index d427bdfbe0d..0b77b4a38d0 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -3708,6 +3708,7 @@ populate_recordset_record(PopulateRecordsetState *state, JsObject *obj) tuple.t_len = HeapTupleHeaderGetDatumLength(tuphead); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = tuphead; tuplestore_puttuple(state->tuple_store, &tuple); diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index 359cde87720..9bff14efaa6 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -77,7 +77,7 @@ VXIDGetDatum(BackendId bid, LocalTransactionId lxid) * The representation is "/", decimal and unsigned decimal * respectively. Note that elog.c also knows how to format a vxid. */ - char vxidstr[32]; + char vxidstr[64]; snprintf(vxidstr, sizeof(vxidstr), "%d/%llu", bid, (unsigned long long) lxid); @@ -293,7 +293,9 @@ pg_lock_status(PG_FUNCTION_ARGS) break; case LOCKTAG_TRANSACTION: values[6] = - TransactionIdGetDatum(instance->locktag.locktag_field1); + TransactionIdGetDatum( + (TransactionId) instance->locktag.locktag_field1 | + ((TransactionId) instance->locktag.locktag_field2 << 32)); nulls[1] = true; nulls[2] = true; nulls[3] = true; @@ -305,7 +307,8 @@ pg_lock_status(PG_FUNCTION_ARGS) break; case LOCKTAG_VIRTUALTRANSACTION: values[5] = VXIDGetDatum(instance->locktag.locktag_field1, - instance->locktag.locktag_field2); + (TransactionId) instance->locktag.locktag_field2 | + ((TransactionId) instance->locktag.locktag_field3 << 32)); nulls[1] = true; nulls[2] = true; nulls[3] = true; diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 893690dad52..41366cc87cf 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/xact.h" #include "access/xlog.h" #include "access/xlogprefetcher.h" #include "catalog/pg_authid.h" diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c index db843a0fbf0..5e08d02d645 100644 --- a/src/backend/utils/adt/rowtypes.c +++ b/src/backend/utils/adt/rowtypes.c @@ -327,6 +327,7 @@ record_out(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = rec; /* @@ -694,6 +695,7 @@ record_send(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple); tuple.t_data = rec; /* @@ -844,10 +846,12 @@ record_cmp(FunctionCallInfo fcinfo) tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple1); tuple1.t_data = record1; tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tuple2); tuple2.t_data = record2; /* @@ -1089,10 +1093,12 @@ record_eq(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroBase(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroBase(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1351,10 +1357,12 @@ record_image_cmp(FunctionCallInfo fcinfo) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroBase(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroBase(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1597,10 +1605,12 @@ record_image_eq(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroBase(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroBase(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1800,6 +1810,7 @@ hash_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = record; + HeapTupleSetZeroBase(&tuple); /* * We arrange to look up the needed hashing info just once per series of @@ -1921,6 +1932,7 @@ hash_record_extended(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = record; + HeapTupleSetZeroBase(&tuple); /* * We arrange to look up the needed hashing info just once per series of diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c index e4b4952a281..056752cfaf2 100644 --- a/src/backend/utils/adt/xid.c +++ b/src/backend/utils/adt/xid.c @@ -32,16 +32,16 @@ xidin(PG_FUNCTION_ARGS) { char *str = PG_GETARG_CSTRING(0); - PG_RETURN_TRANSACTIONID((TransactionId) strtoul(str, NULL, 0)); + PG_RETURN_TRANSACTIONID((TransactionId) strtou64(str, NULL, 0)); } Datum xidout(PG_FUNCTION_ARGS) { TransactionId transactionId = PG_GETARG_TRANSACTIONID(0); - char *result = (char *) palloc(16); + char *result = (char *) palloc(32); - snprintf(result, 16, "%lu", (unsigned long) transactionId); + snprintf(result, 32, "%llu", (unsigned long long) transactionId); PG_RETURN_CSTRING(result); } @@ -52,8 +52,13 @@ Datum xidrecv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + uint32 lo, + hi; + + lo = (uint32) pq_getmsgint(buf, sizeof(TransactionId)); + hi = (uint32) pq_getmsgint(buf, sizeof(TransactionId)); - PG_RETURN_TRANSACTIONID((TransactionId) pq_getmsgint(buf, sizeof(TransactionId))); + PG_RETURN_TRANSACTIONID((uint64) lo + ((uint64) hi << 32)); } /* @@ -64,9 +69,15 @@ xidsend(PG_FUNCTION_ARGS) { TransactionId arg1 = PG_GETARG_TRANSACTIONID(0); StringInfoData buf; + uint32 lo, + hi; + + lo = (uint32) (arg1 & 0xFFFFFFFF); + hi = (uint32) (arg1 >> 32); pq_begintypsend(&buf); - pq_sendint32(&buf, arg1); + pq_sendint(&buf, lo, sizeof(lo)); + pq_sendint(&buf, hi, sizeof(hi)); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } @@ -105,9 +116,9 @@ xid_age(PG_FUNCTION_ARGS) /* Permanent XIDs are always infinitely old */ if (!TransactionIdIsNormal(xid)) - PG_RETURN_INT32(INT_MAX); + PG_RETURN_INT64(PG_INT8_MAX); - PG_RETURN_INT32((int32) (now - xid)); + PG_RETURN_INT64((int64) (now - xid)); } /* @@ -120,9 +131,9 @@ mxid_age(PG_FUNCTION_ARGS) MultiXactId now = ReadNextMultiXactId(); if (!MultiXactIdIsValid(xid)) - PG_RETURN_INT32(INT_MAX); + PG_RETURN_INT64(PG_INT8_MAX); - PG_RETURN_INT32((int32) (now - xid)); + PG_RETURN_INT64((int64) (now - xid)); } /* @@ -184,7 +195,7 @@ xid8in(PG_FUNCTION_ARGS) { char *str = PG_GETARG_CSTRING(0); - PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(strtou64(str, NULL, 0))); + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromXid(strtou64(str, NULL, 0))); } Datum @@ -193,7 +204,7 @@ xid8out(PG_FUNCTION_ARGS) FullTransactionId fxid = PG_GETARG_FULLTRANSACTIONID(0); char *result = (char *) palloc(21); - snprintf(result, 21, UINT64_FORMAT, U64FromFullTransactionId(fxid)); + snprintf(result, 21, UINT64_FORMAT, XidFromFullTransactionId(fxid)); PG_RETURN_CSTRING(result); } @@ -204,7 +215,7 @@ xid8recv(PG_FUNCTION_ARGS) uint64 value; value = (uint64) pq_getmsgint64(buf); - PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(value)); + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromXid(value)); } Datum @@ -214,7 +225,7 @@ xid8send(PG_FUNCTION_ARGS) StringInfoData buf; pq_begintypsend(&buf); - pq_sendint64(&buf, (uint64) U64FromFullTransactionId(arg1)); + pq_sendint64(&buf, (uint64) XidFromFullTransactionId(arg1)); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c index 6c57ec3d358..2a9ed527481 100644 --- a/src/backend/utils/adt/xid8funcs.c +++ b/src/backend/utils/adt/xid8funcs.c @@ -79,8 +79,7 @@ typedef struct * It is an ERROR if the xid is in the future. Otherwise, returns true if * the transaction is still new enough that we can determine whether it * committed and false otherwise. If *extracted_xid is not NULL, it is set - * to the low 32 bits of the transaction ID (i.e. the actual XID, without the - * epoch). + * to the actual transaction ID. * * The caller must hold XactTruncationLock since it's dealing with arbitrary * XIDs, and must continue to hold it until it's done with any clog lookups @@ -89,15 +88,10 @@ typedef struct static bool TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) { - uint32 xid_epoch = EpochFromFullTransactionId(fxid); TransactionId xid = XidFromFullTransactionId(fxid); - uint32 now_epoch; - TransactionId now_epoch_next_xid; FullTransactionId now_fullxid; now_fullxid = ReadNextFullTransactionId(); - now_epoch_next_xid = XidFromFullTransactionId(now_fullxid); - now_epoch = EpochFromFullTransactionId(now_fullxid); if (extracted_xid != NULL) *extracted_xid = xid; @@ -114,7 +108,7 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("transaction ID %llu is in the future", - (unsigned long long) U64FromFullTransactionId(fxid)))); + (unsigned long long) XidFromFullTransactionId(fxid)))); /* * ShmemVariableCache->oldestClogXid is protected by XactTruncationLock, @@ -126,48 +120,15 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) Assert(LWLockHeldByMe(XactTruncationLock)); /* - * If the transaction ID has wrapped around, it's definitely too old to - * determine the commit status. Otherwise, we can compare it to - * ShmemVariableCache->oldestClogXid to determine whether the relevant - * CLOG entry is guaranteed to still exist. + * We compare xid to ShmemVariableCache->oldestClogXid to determine + * whether the relevant CLOG entry is guaranteed to still exist. */ - if (xid_epoch + 1 < now_epoch - || (xid_epoch + 1 == now_epoch && xid < now_epoch_next_xid) - || TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid)) + if (TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid)) return false; return true; } -/* - * Convert a TransactionId obtained from a snapshot held by the caller to a - * FullTransactionId. Use next_fxid as a reference FullTransactionId, so that - * we can compute the high order bits. It must have been obtained by the - * caller with ReadNextFullTransactionId() after the snapshot was created. - */ -static FullTransactionId -widen_snapshot_xid(TransactionId xid, FullTransactionId next_fxid) -{ - TransactionId next_xid = XidFromFullTransactionId(next_fxid); - uint32 epoch = EpochFromFullTransactionId(next_fxid); - - /* Special transaction ID. */ - if (!TransactionIdIsNormal(xid)) - return FullTransactionIdFromEpochAndXid(0, xid); - - /* - * The 64 bit result must be <= next_fxid, since next_fxid hadn't been - * issued yet when the snapshot was created. Every TransactionId in the - * snapshot must therefore be from the same epoch as next_fxid, or the - * epoch before. We know this because next_fxid is never allow to get - * more than one epoch ahead of the TransactionIds in any snapshot. - */ - if (xid > next_xid) - epoch--; - - return FullTransactionIdFromEpochAndXid(epoch, xid); -} - /* * txid comparator for qsort/bsearch */ @@ -294,12 +255,12 @@ parse_snapshot(const char *str) char *endp; StringInfo buf; - xmin = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + xmin = FullTransactionIdFromXid(strtou64(str, &endp, 10)); if (*endp != ':') goto bad_format; str = endp + 1; - xmax = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + xmax = FullTransactionIdFromXid(strtou64(str, &endp, 10)); if (*endp != ':') goto bad_format; str = endp + 1; @@ -317,7 +278,7 @@ parse_snapshot(const char *str) while (*str != '\0') { /* read next value */ - val = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + val = FullTransactionIdFromXid(strtou64(str, &endp, 10)); str = endp; /* require the input to be in order */ @@ -396,7 +357,6 @@ pg_current_snapshot(PG_FUNCTION_ARGS) uint32 nxip, i; Snapshot cur; - FullTransactionId next_fxid = ReadNextFullTransactionId(); cur = GetActiveSnapshot(); if (cur == NULL) @@ -414,11 +374,11 @@ pg_current_snapshot(PG_FUNCTION_ARGS) snap = palloc(PG_SNAPSHOT_SIZE(nxip)); /* fill */ - snap->xmin = widen_snapshot_xid(cur->xmin, next_fxid); - snap->xmax = widen_snapshot_xid(cur->xmax, next_fxid); + snap->xmin = FullTransactionIdFromXid(cur->xmin); + snap->xmax = FullTransactionIdFromXid(cur->xmax); snap->nxip = nxip; for (i = 0; i < nxip; i++) - snap->xip[i] = widen_snapshot_xid(cur->xip[i], next_fxid); + snap->xip[i] = FullTransactionIdFromXid(cur->xip[i]); /* * We want them guaranteed to be in ascending order. This also removes @@ -466,16 +426,16 @@ pg_snapshot_out(PG_FUNCTION_ARGS) initStringInfo(&str); appendStringInfo(&str, UINT64_FORMAT ":", - U64FromFullTransactionId(snap->xmin)); + XidFromFullTransactionId(snap->xmin)); appendStringInfo(&str, UINT64_FORMAT ":", - U64FromFullTransactionId(snap->xmax)); + XidFromFullTransactionId(snap->xmax)); for (i = 0; i < snap->nxip; i++) { if (i > 0) appendStringInfoChar(&str, ','); appendStringInfo(&str, UINT64_FORMAT, - U64FromFullTransactionId(snap->xip[i])); + XidFromFullTransactionId(snap->xip[i])); } PG_RETURN_CSTRING(str.data); @@ -504,8 +464,8 @@ pg_snapshot_recv(PG_FUNCTION_ARGS) if (nxip < 0 || nxip > PG_SNAPSHOT_MAX_NXIP) goto bad_format; - xmin = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); - xmax = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); + xmin = FullTransactionIdFromXid((uint64) pq_getmsgint64(buf)); + xmax = FullTransactionIdFromXid((uint64) pq_getmsgint64(buf)); if (!FullTransactionIdIsValid(xmin) || !FullTransactionIdIsValid(xmax) || FullTransactionIdPrecedes(xmax, xmin)) @@ -518,7 +478,7 @@ pg_snapshot_recv(PG_FUNCTION_ARGS) for (i = 0; i < nxip; i++) { FullTransactionId cur = - FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); + FullTransactionIdFromXid((uint64) pq_getmsgint64(buf)); if (FullTransactionIdPrecedes(cur, last) || FullTransactionIdPrecedes(cur, xmin) || @@ -563,10 +523,10 @@ pg_snapshot_send(PG_FUNCTION_ARGS) pq_begintypsend(&buf); pq_sendint32(&buf, snap->nxip); - pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmin)); - pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmax)); + pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xmin)); + pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xmax)); for (i = 0; i < snap->nxip; i++) - pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xip[i])); + pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xip[i])); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } @@ -654,8 +614,7 @@ pg_snapshot_xip(PG_FUNCTION_ARGS) * Report the status of a recent transaction ID, or null for wrapped, * truncated away or otherwise too old XIDs. * - * The passed epoch-qualified xid is treated as a normal xid, not a - * multixact id. + * The passed xid is treated as a normal xid, not a multixact id. * * If it points to a committed subxact the result is the subxact status even * though the parent xact may still be in progress or may have aborted. diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 6ae7c1f50b8..c42819ced41 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -1838,6 +1838,7 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments, memcpy((char *) ct->tuple.t_data, (const char *) dtp->t_data, dtp->t_len); + HeapTupleCopyBase(&ct->tuple, dtp); MemoryContextSwitchTo(oldcxt); if (dtp != ntp) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 60e72f9e8bf..2ca308923fc 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2306,8 +2306,7 @@ RelationReloadIndexInfo(Relation relation) relation->rd_index->indislive = index->indislive; /* Copy xmin too, as that is needed to make sense of indcheckxmin */ - HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data, - HeapTupleHeaderGetXmin(tuple->t_data)); + HeapTupleSetXmin(relation->rd_indextuple, HeapTupleGetXmin(tuple)); ReleaseSysCache(tuple); } diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index a9dd068095b..34fe6bea838 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -540,7 +540,7 @@ lookup_C_func(HeapTuple procedureTuple) NULL); if (entry == NULL) return NULL; /* no such entry */ - if (entry->fn_xmin == HeapTupleHeaderGetRawXmin(procedureTuple->t_data) && + if (entry->fn_xmin == HeapTupleGetRawXmin(procedureTuple) && ItemPointerEquals(&entry->fn_tid, &procedureTuple->t_self)) return entry; /* OK */ return NULL; /* entry is out of date */ @@ -576,7 +576,7 @@ record_C_func(HeapTuple procedureTuple, HASH_ENTER, &found); /* OID is already filled in */ - entry->fn_xmin = HeapTupleHeaderGetRawXmin(procedureTuple->t_data); + entry->fn_xmin = HeapTupleGetRawXmin(procedureTuple); entry->fn_tid = procedureTuple->t_self; entry->user_fn = user_fn; entry->inforec = inforec; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index b5de58716e2..0f41345f397 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2699,74 +2699,6 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, - { - {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Minimum age at which VACUUM should freeze a table row."), - NULL - }, - &vacuum_freeze_min_age, - 50000000, 0, 1000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."), - NULL - }, - &vacuum_freeze_table_age, - 150000000, 0, 2000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."), - NULL - }, - &vacuum_multixact_freeze_min_age, - 5000000, 0, 1000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."), - NULL - }, - &vacuum_multixact_freeze_table_age, - 150000000, 0, 2000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_defer_cleanup_age", PGC_SIGHUP, REPLICATION_PRIMARY, - gettext_noop("Number of transactions by which VACUUM and HOT cleanup should be deferred, if any."), - NULL - }, - &vacuum_defer_cleanup_age, - 0, 0, 1000000, /* see ComputeXidHorizons */ - NULL, NULL, NULL - }, - { - {"vacuum_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."), - NULL - }, - &vacuum_failsafe_age, - 1600000000, 0, 2100000000, - NULL, NULL, NULL - }, - { - {"vacuum_multixact_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, - gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."), - NULL - }, - &vacuum_multixact_failsafe_age, - 1600000000, 0, 2100000000, - NULL, NULL, NULL - }, - /* * See also CheckRequiredParameterValues() if this parameter changes */ @@ -3377,28 +3309,6 @@ static struct config_int ConfigureNamesInt[] = 50, 0, INT_MAX, NULL, NULL, NULL }, - { - /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ - {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, - gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."), - NULL - }, - &autovacuum_freeze_max_age, - - /* see vacuum_failsafe_age if you change the upper-limit value. */ - 200000000, 100000, 2000000000, - NULL, NULL, NULL - }, - { - /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ - {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, - gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."), - NULL - }, - &autovacuum_multixact_freeze_max_age, - 400000000, 10000, 2000000000, - NULL, NULL, NULL - }, { /* see max_connections */ {"autovacuum_max_workers", PGC_POSTMASTER, AUTOVACUUM, @@ -3957,6 +3867,100 @@ static struct config_real ConfigureNamesReal[] = static struct config_int64 ConfigureNamesInt64[] = { + { + {"vacuum_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Minimum age at which VACUUM should freeze a table row."), + NULL + }, + &vacuum_freeze_min_age, + INT64CONST(50000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."), + NULL + }, + &vacuum_freeze_table_age, + INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_freeze_min_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."), + NULL + }, + &vacuum_multixact_freeze_min_age, + INT64CONST(5000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_freeze_table_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."), + NULL + }, + &vacuum_multixact_freeze_table_age, + INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_defer_cleanup_age", PGC_SIGHUP, REPLICATION_PRIMARY, + gettext_noop("Number of transactions by which VACUUM and HOT cleanup should be deferred, if any."), + NULL + }, + &vacuum_defer_cleanup_age, + INT64CONST(0), INT64CONST(0), INT64CONST(1000000), + NULL, NULL, NULL + }, + + { + {"vacuum_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."), + NULL + }, + &vacuum_failsafe_age, + INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_failsafe_age", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."), + NULL + }, + &vacuum_multixact_failsafe_age, + INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000), + NULL, NULL, NULL + }, + + { + /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ + {"autovacuum_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, + gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."), + NULL + }, + &autovacuum_freeze_max_age, + + /* see vacuum_failsafe_age if you change the upper-limit value. */ + INT64CONST(10000000000), INT64CONST(100000), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ + {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, AUTOVACUUM, + gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."), + NULL + }, + &autovacuum_multixact_freeze_max_age, + INT64CONST(20000000000), INT64CONST(10000), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/help_config.c b/src/backend/utils/misc/help_config.c index 61c83f35901..19a316ec99b 100644 --- a/src/backend/utils/misc/help_config.c +++ b/src/backend/utils/misc/help_config.c @@ -33,6 +33,7 @@ typedef union struct config_bool _bool; struct config_real real; struct config_int integer; + struct config_int64 integer8; struct config_string string; struct config_enum _enum; } mixedStruct; @@ -107,7 +108,12 @@ printMixedStruct(mixedStruct *structToPrint) structToPrint->integer.min, structToPrint->integer.max); break; - + case PGC_INT64: + printf("INT64\t%lld\t%lld\t%lld\t", + (long long) structToPrint->integer8.reset_val, + (long long) structToPrint->integer8.min, + (long long) structToPrint->integer8.max); + break; case PGC_REAL: printf("REAL\t%g\t%g\t%g\t", structToPrint->real.reset_val, diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index 4ab4a0a7014..ffffdeccbb0 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -165,7 +165,7 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) nulls[5] = false; values[6] = CStringGetTextDatum(psprintf("%llu", - (unsigned long long) U64FromFullTransactionId(ControlFile->checkPointCopy.nextXid))); + (unsigned long long) XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid))); nulls[6] = false; values[7] = ObjectIdGetDatum(ControlFile->checkPointCopy.nextOid); diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 48ad80cf2e8..2050ae1c1f4 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -651,9 +651,9 @@ #autovacuum_vacuum_insert_scale_factor = 0.2 # fraction of inserts over table # size before insert vacuum #autovacuum_analyze_scale_factor = 0.1 # fraction of table size before analyze -#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum +#autovacuum_freeze_max_age = 10000000000 # maximum XID age before forced vacuum # (change requires restart) -#autovacuum_multixact_freeze_max_age = 400000000 # maximum multixact age +#autovacuum_multixact_freeze_max_age = 20000000000 # maximum multixact age # before forced vacuum # (change requires restart) #autovacuum_vacuum_cost_delay = 2ms # default vacuum cost delay for diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 31554fd867d..5db9d0b65fc 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -4269,11 +4269,13 @@ static void writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) { HeapTuple tuple = (HeapTuple) stup->tuple; - unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); + unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + 2 * sizeof(TransactionId) + sizeof(int); /* We need to store t_self, but not other fields of HeapTupleData */ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeWrite(tape, &tuple->t_xmin, sizeof(TransactionId)); + LogicalTapeWrite(tape, &tuple->t_xmax, sizeof(TransactionId)); LogicalTapeWrite(tape, tuple->t_data, tuple->t_len); if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length * word? */ @@ -4290,7 +4292,7 @@ static void readtup_cluster(Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int tuplen) { - unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); + unsigned int t_len = tuplen - sizeof(ItemPointerData) - 2 * sizeof(TransactionId) - sizeof(int); HeapTuple tuple = (HeapTuple) readtup_alloc(state, t_len + HEAPTUPLESIZE); @@ -4298,6 +4300,8 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup, tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); tuple->t_len = t_len; LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeReadExact(tape, &tuple->t_xmin, sizeof(TransactionId)); + LogicalTapeReadExact(tape, &tuple->t_xmax, sizeof(TransactionId)); /* We don't currently bother to reconstruct t_tableOid */ tuple->t_tableOid = InvalidOid; /* Read in the tuple body */ diff --git a/src/backend/utils/time/combocid.c b/src/backend/utils/time/combocid.c index 6613dc05340..f673624f68b 100644 --- a/src/backend/utils/time/combocid.c +++ b/src/backend/utils/time/combocid.c @@ -101,12 +101,13 @@ static CommandId GetRealCmax(CommandId combocid); */ CommandId -HeapTupleHeaderGetCmin(HeapTupleHeader tup) +HeapTupleGetCmin(HeapTuple tuple) { + HeapTupleHeader tup = tuple->t_data; CommandId cid = HeapTupleHeaderGetRawCommandId(tup); Assert(!(tup->t_infomask & HEAP_MOVED)); - Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tup))); + Assert(TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple))); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmin(cid); @@ -115,8 +116,9 @@ HeapTupleHeaderGetCmin(HeapTupleHeader tup) } CommandId -HeapTupleHeaderGetCmax(HeapTupleHeader tup) +HeapTupleGetCmax(HeapTuple tuple) { + HeapTupleHeader tup = tuple->t_data; CommandId cid = HeapTupleHeaderGetRawCommandId(tup); Assert(!(tup->t_infomask & HEAP_MOVED)); @@ -128,7 +130,7 @@ HeapTupleHeaderGetCmax(HeapTupleHeader tup) * things too much. */ Assert(CritSectionCount > 0 || - TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tup))); + TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple))); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmax(cid); @@ -150,7 +152,7 @@ HeapTupleHeaderGetCmax(HeapTupleHeader tup) * changes the tuple in shared buffers. */ void -HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, +HeapTupleHeaderAdjustCmax(HeapTuple tup, CommandId *cmax, bool *iscombo) { @@ -160,10 +162,10 @@ HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, * Test for HeapTupleHeaderXminCommitted() first, because it's cheaper * than a TransactionIdIsCurrentTransactionId call. */ - if (!HeapTupleHeaderXminCommitted(tup) && - TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tup))) + if (!HeapTupleHeaderXminCommitted(tup->t_data) && + TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(tup))) { - CommandId cmin = HeapTupleHeaderGetCmin(tup); + CommandId cmin = HeapTupleGetCmin(tup); *cmax = GetComboCommandId(cmin, *cmax); *iscombo = true; diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 8e03a9f0f44..ad215e8d9ee 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -1174,8 +1174,9 @@ ExportSnapshot(Snapshot snapshot) * Generate file path for the snapshot. We start numbering of snapshots * inside the transaction from 1. */ - snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d", - MyProc->backendId, MyProc->lxid, list_length(exportedSnapshots) + 1); + snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X%08X-%d", + MyProc->backendId, (uint32) (MyProc->lxid >> 32), + (uint32) MyProc->lxid, list_length(exportedSnapshots) + 1); /* * Copy the snapshot into TopTransactionContext, add it to the @@ -1351,7 +1352,7 @@ parseXidFromText(const char *prefix, char **s, const char *filename) (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; - if (sscanf(ptr, "%u", &val) != 1) + if (sscanf(ptr, "%" INT64_MODIFIER "u", &val) != 1) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); @@ -1376,7 +1377,7 @@ parseVxidFromText(const char *prefix, char **s, const char *filename, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; - if (sscanf(ptr, "%d/%u", &vxid->backendId, &vxid->localTransactionId) != 2) + if (sscanf(ptr, "%d/%" INT64_MODIFIER "u", &vxid->backendId, &vxid->localTransactionId) != 2) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 62320ec5f76..c8b573e732f 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -1351,7 +1351,7 @@ bootstrap_template1(void) escape_quotes_bki(username)); /* relfrozenxid must not be less than FirstNormalTransactionId */ - sprintf(buf, "%u", Max(start_xid, 3)); + sprintf(buf, "%llu", (unsigned long long) Max(start_xid, 3)); bki_lines = replace_token(bki_lines, "RECENTXMIN", buf); @@ -1374,13 +1374,13 @@ bootstrap_template1(void) unsetenv("PGCLIENTENCODING"); snprintf(cmd, sizeof(cmd), - "\"%s\" --boot -X %d %s %s %u %s %u %s %u %s %s %s", + "\"%s\" --boot -X %d %s %s %llu %s %llu %s %llu %s %s %s", backend_exec, wal_segment_size_mb * (1024 * 1024), data_checksums ? "-k" : "", - "-m", start_mxid, - "-o", start_mxoff, - "-x", start_xid, + "-m", (unsigned long long) start_mxid, + "-o", (unsigned long long) start_mxoff, + "-x", (unsigned long long) start_xid, boot_options, extra_options, debug ? "-d 5" : ""); @@ -2202,15 +2202,18 @@ usage(const char *progname) printf(_(" --discard-caches set debug_discard_caches=1\n")); printf(_(" -L DIRECTORY where to find the input files\n")); printf(_(" -m, --multixact-id=START_MXID\n" - " set initial database cluster multixact id\n")); + " set initial database cluster multixact id\n" + " max value is 2^62-1\n")); printf(_(" -n, --no-clean do not clean up after errors\n")); printf(_(" -N, --no-sync do not wait for changes to be written safely to disk\n")); printf(_(" --no-instructions do not print instructions for next steps\n")); printf(_(" -o, --multixact-offset=START_MXOFF\n" - " set initial database cluster multixact offset\n")); + " set initial database cluster multixact offset\n" + " max value is 2^62-1")); printf(_(" -s, --show show internal settings\n")); printf(_(" -S, --sync-only only sync database files to disk, then exit\n")); - printf(_(" -x, --xid=START_XID set initial database cluster xid\n")); + printf(_(" -x, --xid=START_XID set initial database cluster xid\n" + " max value is 2^62-1\n")); printf(_("\nOther options:\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); @@ -2744,13 +2747,16 @@ initialize_data_directory(void) setup_config(); if (start_mxid != 0) - printf(_("selecting initial multixact id ... %u\n"), start_mxid); + printf(_("selecting initial multixact id ... %llu\n"), + (unsigned long long) start_mxid); if (start_mxoff != 0) - printf(_("selecting initial multixact offset ... %u\n"), start_mxoff); + printf(_("selecting initial multixact offset ... %llu\n"), + (unsigned long long) start_mxoff); if (start_xid != 0) - printf(_("selecting initial xid ... %u\n"), start_xid); + printf(_("selecting initial xid ... %llu\n"), + (unsigned long long) start_xid); /* Bootstrap template1 */ bootstrap_template1(); @@ -2768,11 +2774,11 @@ initialize_data_directory(void) fflush(stdout); snprintf(cmd, sizeof(cmd), - "\"%s\" %s %s %s %u %s %u %s %u template1 >%s", + "\"%s\" %s %s %s %llu %s %llu %s %llu template1 >%s", backend_exec, backend_options, extra_options, - "-m", start_mxid, - "-o", start_mxoff, - "-x", start_xid, + "-m", (unsigned long long) start_mxid, + "-o", (unsigned long long) start_mxoff, + "-x", (unsigned long long) start_xid, DEVNULL); PG_CMD_OPEN; @@ -2939,15 +2945,13 @@ main(int argc, char *argv[]) break; case 'm': { - unsigned long value; - char *endptr; + char *endptr; errno = 0; - value = strtoul(optarg, &endptr, 0); - start_mxid = value; + start_mxid = strtoull(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0 || - value != start_mxid) /* overflow */ + !StartMultiXactIdIsValid(start_mxid)) { pg_log_error("invalid initial database cluster multixact id"); exit(1); @@ -2972,15 +2976,13 @@ main(int argc, char *argv[]) break; case 'o': { - unsigned long value; - char *endptr; + char *endptr; errno = 0; - value = strtoul(optarg, &endptr, 0); - start_mxoff = value; + start_mxoff = strtoull(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0 || - value != start_mxoff) /* overflow */ + !StartMultiXactOffsetIsValid(start_mxoff)) { pg_log_error("invalid initial database cluster multixact offset"); exit(1); @@ -3059,15 +3061,13 @@ main(int argc, char *argv[]) break; case 'x': { - unsigned long value; - char *endptr; + char *endptr; errno = 0; - value = strtoul(optarg, &endptr, 0); - start_xid = value; + start_xid = strtoull(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0 || - value != start_xid) /* overflow */ + !StartTransactionIdIsValid(start_xid)) { pg_log_error("invalid value for initial database cluster xid"); exit(1); diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index 9f9ee99cb73..425470e7d7a 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -140,28 +140,28 @@ command_fails( # Set non-standard initial mxid/mxoff/xid. command_fails_like( - [ 'initdb', '-m', '4294967296', $datadir ], + [ 'initdb', '-m', '9223372036854775807', $datadir ], qr/initdb: error: invalid initial database cluster multixact id/, 'fails for invalid initial database cluster multixact id'); command_fails_like( - [ 'initdb', '-o', '4294967296', $datadir ], + [ 'initdb', '-o', '9223372036854775807', $datadir ], qr/initdb: error: invalid initial database cluster multixact offset/, 'fails for invalid initial database cluster multixact offset'); command_fails_like( - [ 'initdb', '-x', '4294967296', $datadir ], + [ 'initdb', '-x', '9223372036854775807', $datadir ], qr/initdb: error: invalid value for initial database cluster xid/, 'fails for invalid initial database cluster xid'); command_fails_like( - [ 'initdb', '-m', '0x100000000', $datadir ], + [ 'initdb', '-m', '0x10000000000000000', $datadir ], qr/initdb: error: invalid initial database cluster multixact id/, 'fails for invalid initial database cluster multixact id'); command_fails_like( - [ 'initdb', '-o', '0x100000000', $datadir ], + [ 'initdb', '-o', '0x10000000000000000', $datadir ], qr/initdb: error: invalid initial database cluster multixact offset/, 'fails for invalid initial database cluster multixact offset'); command_fails_like( - [ 'initdb', '-x', '0x100000000', $datadir ], + [ 'initdb', '-x', '0x10000000000000000', $datadir ], qr/initdb: error: invalid value for initial database cluster xid/, 'fails for invalid initial database cluster xid'); diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl index 80508111055..f98b0e90483 100644 --- a/src/bin/pg_amcheck/t/004_verify_heapam.pl +++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl @@ -9,6 +9,8 @@ use PostgreSQL::Test::Utils; use Test::More; +use Data::Dumper; + # This regression test demonstrates that the pg_amcheck binary correctly # identifies specific kinds of corruption within pages. To test this, we need # a mechanism to create corrupt pages with predictable, repeatable corruption. @@ -85,6 +87,60 @@ use Test::More; use constant HEAPTUPLE_PACK_CODE => 'LLLSSSSSCCLLCCCCCCCCCCllLL'; use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size +use constant HEAPPAGE_SPECIAL_PACK_CODE => 'QQ'; +use constant HEAPPAGE_SPECIAL_PACK_LENGTH => 16; +use constant HEAPPAGE_SIZE => 8192; + +# Some #define constants from access/htup_details.h for use while corrupting. +use constant HEAP_HASNULL => 0x0001; +use constant HEAP_XMAX_LOCK_ONLY => 0x0080; +use constant HEAP_XMIN_COMMITTED => 0x0100; +use constant HEAP_XMIN_INVALID => 0x0200; +use constant HEAP_XMAX_COMMITTED => 0x0400; +use constant HEAP_XMAX_INVALID => 0x0800; +use constant HEAP_NATTS_MASK => 0x07FF; +use constant HEAP_XMAX_IS_MULTI => 0x1000; +use constant HEAP_KEYS_UPDATED => 0x2000; + +use constant FIRST_NORMAL_TRANSACTION_ID => 3; + +# Read page special data +sub read_special_data +{ + my ($fh, $offset) = @_; + my ($buffer, %special); + $offset -= $offset % HEAPPAGE_SIZE; + $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH; + sysseek($fh, $offset, 0) + or BAIL_OUT("sysseek failed: $!"); + defined(sysread($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH)) + or BAIL_OUT("sysread failed: $!"); + + @_ = unpack(HEAPPAGE_SPECIAL_PACK_CODE, $buffer); + %special = ( + pd_xid_base => shift, + pd_multi_base => shift); + return \%special; +} + +# Write page special data +sub write_special_data +{ + my ($fh, $offset, $special) = @_; + + $offset -= $offset % HEAPPAGE_SIZE; + $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH; + + my $buffer = pack( + HEAPPAGE_SPECIAL_PACK_CODE, + $special->{pd_xid_base}, $special->{pd_multi_base}); + + sysseek($fh, $offset, 0) + or BAIL_OUT("sysseek failed: $!"); + defined(syswrite($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH)) + or BAIL_OUT("syswrite failed: $!"); + return; +} # Read a tuple of our table from a heap page. # @@ -96,7 +152,7 @@ use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size # sub read_tuple { - my ($fh, $offset) = @_; + my ($fh, $offset, $raw) = @_; my ($buffer, %tup); sysseek($fh, $offset, 0) or BAIL_OUT("sysseek failed: $!"); @@ -133,6 +189,18 @@ sub read_tuple c_va_toastrelid => shift); # Stitch together the text for column 'b' $tup{b} = join('', map { chr($tup{"b_body$_"}) } (1 .. 7)); + + if (!$raw) + { + my $special = read_special_data($fh, $offset); + + $tup{t_xmin} += $special->{pd_xid_base}; + my $is_multi = $tup{t_infomask} & HEAP_XMAX_IS_MULTI; + $tup{t_xmax} += !$is_multi ? + $special->{pd_xid_base} : + $special->{pd_multi_base}; + } + return \%tup; } @@ -148,7 +216,32 @@ sub read_tuple # sub write_tuple { - my ($fh, $offset, $tup) = @_; + my ($fh, $offset, $tup, $raw) = @_; + if (!$raw) + { + my $special = read_special_data($fh, $offset); + + my $xmin = $tup->{t_xmin} - $special->{pd_xid_base}; + die "tuple x_min $tup->{t_xmin} is too smal for pd_xid_base $special->{pd_xid_base}" + if $xmin < 3; + $tup->{t_xmin} = $xmin; + + if (($tup->{t_infomask} & HEAP_XMAX_IS_MULTI) == 0) + { + my $xmax = $tup->{t_xmax} - $special->{pd_xid_base}; + die "tuple x_max $tup->{t_xmax} is too smal for pd_xid_base $special->{pd_xid_base}" + if $xmax < 3; + $tup->{t_xmax} = $xmax; + } + else + { + my $xmax = $tup->{t_xmax} - $special->{pd_multi_base}; + die "tuple multi x_max $tup->{t_xmax} is too smal for pd_multi_base $special->{pd_multi_base}" + if $xmax < 3; + $tup->{t_xmax} = $xmax; + } + } + my $buffer = pack( HEAPTUPLE_PACK_CODE, $tup->{t_xmin}, $tup->{t_xmax}, @@ -171,6 +264,41 @@ sub write_tuple return; } +# move pd_xid_base and pd_multi_base to more suitable position for tests. +sub fixup_page +{ + my ($fh, $page, $xid_base, $multi_base, $lp_off) = @_; + my $offset = $page * HEAPPAGE_SIZE; + my $special = read_special_data($fh, $offset); + + die "xid_base $xid_base should be lesser than existed $special->{pd_xid_base}" + if ($xid_base > $special->{pd_xid_base}); + die "multi_base $multi_base should be lesser than existed $special->{pd_multi_base}" + if ($multi_base > $special->{pd_multi_base} && $special->{pd_multi_base} != 0); + return if ($xid_base == $special->{pd_xid_base} && + $multi_base == $special->{pd_multi_base}); + + my $xid_delta = $special->{pd_xid_base} - $xid_base; + my $multi_delta = $special->{pd_multi_base} - $multi_base; + + for my $off (@$lp_off) + { + # change only tuples on this page. + next if ($off < $offset && $off > $offset + HEAPPAGE_SIZE); + + my $tup = read_tuple($fh, $off, 1); + $tup->{t_xmin} += $xid_delta; + my $is_multi = $tup->{t_infomask} & HEAP_XMAX_IS_MULTI; + $tup->{t_xmax} += !$is_multi ? $xid_delta : $multi_delta; + write_tuple($fh, $off, $tup, 1); + } + + $special->{pd_xid_base} = $xid_base; + $special->{pd_multi_base} = $multi_base; + + write_special_data($fh, $offset, $special); +} + # Set umask so test directories and files are created with default permissions umask(0077); @@ -233,6 +361,10 @@ my $relfrozenxid = $node->safe_psql('postgres', q(select relfrozenxid from pg_class where relname = 'test')); my $datfrozenxid = $node->safe_psql('postgres', q(select datfrozenxid from pg_database where datname = 'postgres')); +my $datminmxid = $node->safe_psql('postgres', + q(select datminmxid from pg_database where datname = 'postgres')); +my $txid_current = $node->safe_psql('postgres', + q(select txid_current())); # Sanity check that our 'test' table has a relfrozenxid newer than the # datfrozenxid for the database, and that the datfrozenxid is greater than the @@ -291,6 +423,11 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++) # Determine endianness of current platform from the 1-byte varlena header $ENDIANNESS = $tup->{b_header} == 0x11 ? "little" : "big"; } + +# Set 64bit xid bases a bit in the past therefore we can set xmin/xmax a bit +# in the past +fixup_page($file, 0, $datfrozenxid - 100, $datminmxid - 100, \@lp_off); + close($file) or BAIL_OUT("close failed: $!"); $node->start; @@ -308,17 +445,6 @@ $node->command_ok([ 'pg_amcheck', '-p', $port, 'postgres' ], $node->stop; -# Some #define constants from access/htup_details.h for use while corrupting. -use constant HEAP_HASNULL => 0x0001; -use constant HEAP_XMAX_LOCK_ONLY => 0x0080; -use constant HEAP_XMIN_COMMITTED => 0x0100; -use constant HEAP_XMIN_INVALID => 0x0200; -use constant HEAP_XMAX_COMMITTED => 0x0400; -use constant HEAP_XMAX_INVALID => 0x0800; -use constant HEAP_NATTS_MASK => 0x07FF; -use constant HEAP_XMAX_IS_MULTI => 0x1000; -use constant HEAP_KEYS_UPDATED => 0x2000; - # Helper function to generate a regular expression matching the header we # expect verify_heapam() to return given which fields we expect to be non-null. sub header @@ -367,7 +493,7 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++) if ($offnum == 2) { # Corruptly set xmin < datfrozenxid - my $xmin = 3; + my $xmin = $datfrozenxid - 12; $tup->{t_xmin} = $xmin; $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; @@ -377,24 +503,24 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++) } elsif ($offnum == 3) { - # Corruptly set xmin < datfrozenxid, further back, noting circularity - # of xid comparison. For a new cluster with epoch = 0, the corrupt - # xmin will be interpreted as in the future - $tup->{t_xmin} = 4026531839; + # Corruptly set xmin > next transaction id. + my $xmin = $relfrozenxid + 1000000; + $tup->{t_xmin} = $xmin; $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; push @expected, - qr/${$header}xmin 4026531839 equals or exceeds next valid transaction ID \d+/; + qr/${$header}xmin $xmin equals or exceeds next valid transaction ID \d+/; } elsif ($offnum == 4) { - # Corruptly set xmax < relminmxid; - $tup->{t_xmax} = 4026531839; + # Corruptly set xmax > next transaction id. + my $xmax = $relfrozenxid + 1000000; + $tup->{t_xmax} = $xmax; $tup->{t_infomask} &= ~HEAP_XMAX_INVALID; push @expected, - qr/${$header}xmax 4026531839 equals or exceeds next valid transaction ID \d+/; + qr/${$header}xmax $xmax equals or exceeds next valid transaction ID \d+/; } elsif ($offnum == 5) { @@ -497,20 +623,22 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++) # Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI $tup->{t_infomask} |= HEAP_XMAX_COMMITTED; $tup->{t_infomask} |= HEAP_XMAX_IS_MULTI; - $tup->{t_xmax} = 4; + my $xmax = $datminmxid + 1000000; + $tup->{t_xmax} = $xmax; push @expected, - qr/${header}multitransaction ID 4 equals or exceeds next valid multitransaction ID 1/; + qr/${header}multitransaction ID $xmax equals or exceeds next valid multitransaction ID \d+/; } elsif ($offnum == 15) # Last offnum must equal ROWCOUNT { # Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI $tup->{t_infomask} |= HEAP_XMAX_COMMITTED; $tup->{t_infomask} |= HEAP_XMAX_IS_MULTI; - $tup->{t_xmax} = 4000000000; + my $xmax = $datminmxid - 10; + $tup->{t_xmax} = $xmax; push @expected, - qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/; + qr/${header}multitransaction ID $xmax precedes relation minimum multitransaction ID threshold \d+/; } write_tuple($file, $offset, $tup); } diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index a8a46d5bf03..ffc89b31843 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -248,7 +248,7 @@ main(int argc, char *argv[]) printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile->checkPointCopy.fullPageWrites ? _("on") : _("off")); printf(_("Latest checkpoint's NextXID: %llu\n"), - (unsigned long long) U64FromFullTransactionId(ControlFile->checkPointCopy.nextXid)); + (unsigned long long) XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid)); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile->checkPointCopy.nextOid); printf(_("Latest checkpoint's NextMultiXactId: %llu\n"), diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 68ff771bb27..16cf7e7d4d0 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -2829,7 +2829,7 @@ dumpDatabase(Archive *fout) *datistemplate, *datconnlimit, *tablespace; - uint32 frozenxid, + uint64 frozenxid, minmxid; char *qdatname; @@ -2890,8 +2890,8 @@ dumpDatabase(Archive *fout) iculocale = PQgetvalue(res, 0, i_daticulocale); else iculocale = NULL; - frozenxid = atooid(PQgetvalue(res, 0, i_frozenxid)); - minmxid = atooid(PQgetvalue(res, 0, i_minmxid)); + frozenxid = strtou64(PQgetvalue(res, 0, i_frozenxid), NULL, 0); + minmxid = strtou64(PQgetvalue(res, 0, i_minmxid), NULL, 0); dbdacl.acl = PQgetvalue(res, 0, i_datacl); dbdacl.acldefault = PQgetvalue(res, 0, i_acldefault); datistemplate = PQgetvalue(res, 0, i_datistemplate); @@ -3166,10 +3166,10 @@ dumpDatabase(Archive *fout) appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, set pg_largeobject relfrozenxid and relminmxid\n"); appendPQExpBuffer(loOutQry, "UPDATE pg_catalog.pg_class\n" - "SET relfrozenxid = '%u', relminmxid = '%u'\n" + "SET relfrozenxid = '%s', relminmxid = '%s'\n" "WHERE oid = %u;\n", - atooid(PQgetvalue(lo_res, 0, i_relfrozenxid)), - atooid(PQgetvalue(lo_res, 0, i_relminmxid)), + (PQgetvalue(lo_res, 0, i_relfrozenxid)), + (PQgetvalue(lo_res, 0, i_relminmxid)), LargeObjectRelationId); ArchiveEntry(fout, nilCatalogId, createDumpId(), ARCHIVE_OPTS(.tag = "pg_largeobject", @@ -6421,11 +6421,11 @@ getTables(Archive *fout, int *numTables) tblinfo[i].relreplident = *(PQgetvalue(res, i, i_relreplident)); tblinfo[i].rowsec = (strcmp(PQgetvalue(res, i, i_relrowsec), "t") == 0); tblinfo[i].forcerowsec = (strcmp(PQgetvalue(res, i, i_relforcerowsec), "t") == 0); - tblinfo[i].frozenxid = atooid(PQgetvalue(res, i, i_relfrozenxid)); - tblinfo[i].toast_frozenxid = atooid(PQgetvalue(res, i, i_toastfrozenxid)); + tblinfo[i].frozenxid = strtou64(PQgetvalue(res, i, i_relfrozenxid), NULL, 0); + tblinfo[i].toast_frozenxid = strtou64(PQgetvalue(res, i, i_toastfrozenxid), NULL, 0); tblinfo[i].toast_oid = atooid(PQgetvalue(res, i, i_toastoid)); - tblinfo[i].minmxid = atooid(PQgetvalue(res, i, i_relminmxid)); - tblinfo[i].toast_minmxid = atooid(PQgetvalue(res, i, i_toastminmxid)); + tblinfo[i].minmxid = strtou64(PQgetvalue(res, i, i_relminmxid), NULL, 0); + tblinfo[i].toast_minmxid = strtou64(PQgetvalue(res, i, i_toastminmxid), NULL, 0); tblinfo[i].reloptions = pg_strdup(PQgetvalue(res, i, i_reloptions)); if (PQgetisnull(res, i, i_checkoption)) tblinfo[i].checkoption = NULL; diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 1d21c2906f1..bed20998abb 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -298,11 +298,11 @@ typedef struct _tableInfo bool rowsec; /* is row security enabled? */ bool forcerowsec; /* is row security forced? */ bool hasoids; /* does it have OIDs? */ - uint32 frozenxid; /* table's relfrozenxid */ - uint32 minmxid; /* table's relminmxid */ + uint64 frozenxid; /* table's relfrozenxid */ + uint64 minmxid; /* table's relminmxid */ Oid toast_oid; /* toast table's OID, or 0 if none */ - uint32 toast_frozenxid; /* toast table's relfrozenxid, if any */ - uint32 toast_minmxid; /* toast table's relminmxid */ + uint64 toast_frozenxid; /* toast table's relfrozenxid, if any */ + uint64 toast_minmxid; /* toast table's relminmxid */ int ncheck; /* # of CHECK expressions */ Oid reltype; /* OID of table's composite type, if any */ Oid reloftype; /* underlying type for typed table */ diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index 626f1608bcf..73f733d096b 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -63,7 +63,6 @@ static ControlFileData ControlFile; /* pg_control values */ static XLogSegNo newXlogSegNo; /* new XLOG segment # */ static bool guessed = false; /* T if we had to guess at any values */ static const char *progname; -static uint32 set_xid_epoch = (uint32) -1; static TransactionId set_oldest_xid = 0; static TransactionId set_xid = 0; static TransactionId set_oldest_commit_ts_xid = 0; @@ -95,7 +94,6 @@ main(int argc, char *argv[]) static struct option long_options[] = { {"commit-timestamp-ids", required_argument, NULL, 'c'}, {"pgdata", required_argument, NULL, 'D'}, - {"epoch", required_argument, NULL, 'e'}, {"force", no_argument, NULL, 'f'}, {"next-wal-file", required_argument, NULL, 'l'}, {"multixact-ids", required_argument, NULL, 'm'}, @@ -137,7 +135,7 @@ main(int argc, char *argv[]) } - while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:u:x:", long_options, NULL)) != -1) + while ((c = getopt_long(argc, argv, "c:D:fl:m:no:O:u:x:", long_options, NULL)) != -1) { switch (c) { @@ -153,24 +151,9 @@ main(int argc, char *argv[]) noupdate = true; break; - case 'e': - errno = 0; - set_xid_epoch = strtoul(optarg, &endptr, 0); - if (endptr == optarg || *endptr != '\0' || errno != 0) - { - /*------ - translator: the second %s is a command line argument (-e, etc) */ - pg_log_error("invalid argument for option %s", "-e"); - pg_log_error_hint("Try \"%s --help\" for more information.", progname); - exit(1); - } - if (set_xid_epoch == -1) - pg_fatal("transaction ID epoch (-e) must not be -1"); - break; - case 'u': errno = 0; - set_oldest_xid = strtoul(optarg, &endptr, 0); + set_oldest_xid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-u"); @@ -184,7 +167,7 @@ main(int argc, char *argv[]) case 'x': errno = 0; - set_xid = strtoul(optarg, &endptr, 0); + set_xid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-x"); @@ -198,14 +181,14 @@ main(int argc, char *argv[]) case 'c': errno = 0; - set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0); + set_oldest_commit_ts_xid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',' || errno != 0) { pg_log_error("invalid argument for option %s", "-c"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } - set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0); + set_newest_commit_ts_xid = strtou64(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-c"); @@ -237,7 +220,7 @@ main(int argc, char *argv[]) case 'm': errno = 0; - set_mxid = strtoul(optarg, &endptr, 0); + set_mxid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',' || errno != 0) { pg_log_error("invalid argument for option %s", "-m"); @@ -245,7 +228,7 @@ main(int argc, char *argv[]) exit(1); } - set_oldestmxid = strtoul(endptr + 1, &endptr2, 0); + set_oldestmxid = strtou64(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-m"); @@ -265,7 +248,7 @@ main(int argc, char *argv[]) case 'O': errno = 0; - set_mxoff = strtoul(optarg, &endptr, 0); + set_mxoff = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-O"); @@ -408,11 +391,6 @@ main(int argc, char *argv[]) * Adjust fields if required by switches. (Do this now so that printout, * if any, includes these values.) */ - if (set_xid_epoch != -1) - ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(set_xid_epoch, - XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); - if (set_oldest_xid != 0) { ControlFile.checkPointCopy.oldestXid = set_oldest_xid; @@ -420,9 +398,7 @@ main(int argc, char *argv[]) } if (set_xid != 0) - ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid), - set_xid); + ControlFile.checkPointCopy.nextXid = FullTransactionIdFromXid(set_xid); if (set_oldest_commit_ts_xid != 0) ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid; @@ -655,7 +631,7 @@ GuessControlValues(void) ControlFile.checkPointCopy.PrevTimeLineID = 1; ControlFile.checkPointCopy.fullPageWrites = false; ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); + FullTransactionIdFromXid(FirstNormalTransactionId); ControlFile.checkPointCopy.nextOid = FirstGenbkiObjectId; ControlFile.checkPointCopy.nextMulti = FirstMultiXactId; ControlFile.checkPointCopy.nextMultiOffset = 0; @@ -706,6 +682,8 @@ GuessControlValues(void) * * NB: this display should be just those fields that will not be * reset by RewriteControlFile(). + * + * Special macros help to make translatable strings. */ static void PrintControlValues(bool guessed) @@ -725,8 +703,7 @@ PrintControlValues(bool guessed) ControlFile.checkPointCopy.ThisTimeLineID); printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off")); - printf(_("Latest checkpoint's NextXID: %u:%llu\n"), - EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid), + printf(_("Latest checkpoint's NextXID: %llu\n"), (unsigned long long) XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); @@ -824,12 +801,6 @@ PrintNewControlValues(void) ControlFile.checkPointCopy.oldestXidDB); } - if (set_xid_epoch != -1) - { - printf(_("NextXID epoch: %u\n"), - EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); - } - if (set_oldest_commit_ts_xid != 0) { printf(_("oldestCommitTsXid: %llu\n"), @@ -1140,7 +1111,6 @@ usage(void) " set oldest and newest transactions bearing\n" " commit timestamp (zero means no change)\n")); printf(_(" [-D, --pgdata=]DATADIR data directory\n")); - printf(_(" -e, --epoch=XIDEPOCH set next transaction ID epoch\n")); printf(_(" -f, --force force update to be done\n")); printf(_(" -l, --next-wal-file=WALFILE set minimum starting location for new WAL\n")); printf(_(" -m, --multixact-ids=MXID,MXID set next and oldest multitransaction ID\n")); diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile index 80ebe9bd174..971914a8a0c 100644 --- a/src/bin/pg_upgrade/Makefile +++ b/src/bin/pg_upgrade/Makefile @@ -20,6 +20,7 @@ OBJS = \ parallel.o \ pg_upgrade.o \ relfilenode.o \ + segresize.o \ server.o \ tablespace.o \ util.o \ diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index ace7387edaf..5ae76826d7c 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -32,6 +32,7 @@ static void check_for_pg_role_prefix(ClusterInfo *cluster); static void check_for_new_tablespace_dir(ClusterInfo *new_cluster); static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster); static char *get_canonical_locale_name(int category, const char *locale); +static void check_for_32bit_xid_usage(ClusterInfo *cluster); /* @@ -160,6 +161,17 @@ check_and_dump_old_cluster(bool live_check) if (GET_MAJOR_VERSION(old_cluster.major_version) <= 903) old_9_3_check_for_line_data_type_usage(&old_cluster); + /* Prepare for 64bit xid */ + if (!ALREADY_64bit_XID(old_cluster)) + { + /* Check if 32-bit xid type is used in tables */ + check_for_32bit_xid_usage(&old_cluster); + /* Check indexes to be upgraded */ + invalidate_spgist_indexes(&old_cluster, true); + invalidate_gin_indexes(&old_cluster, true); + invalidate_external_indexes(&old_cluster, true); + } + /* * While not a check option, we do this now because this is the only time * the old server is running. @@ -236,6 +248,17 @@ issue_warnings_and_set_wal_level(void) if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906) old_9_6_invalidate_hash_indexes(&new_cluster, false); + /* Raindex for 64bit xid */ + if (!ALREADY_64bit_XID(old_cluster)) + { + /* Check if 32-bit xid type is used in tables */ + check_for_32bit_xid_usage(&old_cluster); + /* Check indexes to be upgraded */ + invalidate_spgist_indexes(&old_cluster, true); + invalidate_gin_indexes(&old_cluster, true); + invalidate_external_indexes(&old_cluster, true); + } + report_extension_updates(&new_cluster); stop_postmaster(false); @@ -1378,3 +1401,94 @@ get_canonical_locale_name(int category, const char *locale) return res; } + +/* + * check_for_32bit_xid_usage() + * + * Postgres Pro Enterprise changes xid storage format to 64-bit. Check if + * xid type is used in tables. + */ +static void +check_for_32bit_xid_usage(ClusterInfo *cluster) +{ + int dbnum; + FILE *script = NULL; + bool found = false; + char output_path[MAXPGPATH]; + + prep_status("Checking for incompatible \"xid\" data type"); + + snprintf(output_path, sizeof(output_path), "tables_using_xid.txt"); + + for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) + { + PGresult *res; + bool db_used = false; + int ntups; + int rowno; + int i_nspname, + i_relname, + i_attname; + DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; + PGconn *conn = connectToServer(cluster, active_db->db_name); + + /* + * While several relkinds don't store any data, e.g. views, they can + * be used to define data types of other columns, so we check all + * relkinds. + */ + res = executeQueryOrDie(conn, + "SELECT n.nspname, c.relname, a.attname " + "FROM pg_catalog.pg_class c, " + " pg_catalog.pg_namespace n, " + " pg_catalog.pg_attribute a " + "WHERE c.oid = a.attrelid AND " + " a.attnum >= 1 AND " + " a.atttypid = 'pg_catalog.xid'::pg_catalog.regtype AND " + " c.relnamespace = n.oid AND " + /* exclude possible orphaned temp tables */ + " n.nspname !~ '^pg_temp_' AND " + " n.nspname NOT IN ('pg_catalog', 'information_schema')"); + + ntups = PQntuples(res); + i_nspname = PQfnumber(res, "nspname"); + i_relname = PQfnumber(res, "relname"); + i_attname = PQfnumber(res, "attname"); + for (rowno = 0; rowno < ntups; rowno++) + { + found = true; + if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL) + pg_fatal("could not open file \"%s\": %s\n", + output_path, strerror(errno)); + if (!db_used) + { + fprintf(script, "Database: %s\n", active_db->db_name); + db_used = true; + } + fprintf(script, " %s.%s.%s\n", + PQgetvalue(res, rowno, i_nspname), + PQgetvalue(res, rowno, i_relname), + PQgetvalue(res, rowno, i_attname)); + } + + PQclear(res); + + PQfinish(conn); + } + + if (script) + fclose(script); + + if (found) + { + pg_log(PG_REPORT, "fatal\n"); + pg_fatal("Your installation contains the \"xid\" data type in user tables.\n" + "The internal format of \"xid\" changed in Postgres Pro Enterprise so this cluster\n" + "cannot currently be upgraded. Note that even dropped attributes cause a problem.\n" + "You can remove the problem tables and restart the upgrade.\n" + "A list of the problem columns is in the file:\n" + " %s\n\n", output_path); + } + else + check_ok(); +} diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c index 2b1b1a3435e..8ee1ba46187 100644 --- a/src/bin/pg_upgrade/controldata.c +++ b/src/bin/pg_upgrade/controldata.c @@ -282,6 +282,8 @@ get_control_data(ClusterInfo *cluster, bool live_check) xid.value = strtou64(p, NULL, 10); /* + * Try to read 32-bit XID format 'epoch:xid'. + * * Delimiter changed from '/' to ':' in 9.6. We don't test for * the catalog version of the change because the catalog version * is pulled from pg_controldata too, and it isn't worth adding an @@ -297,8 +299,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) if (p == NULL) { /* FullTransactionId representation */ - cluster->controldata.chkpnt_nxtxid = XidFromFullTransactionId(xid); - cluster->controldata.chkpnt_nxtepoch = EpochFromFullTransactionId(xid); + cluster->controldata.chkpnt_nxtxid = xid.value; } else { @@ -307,8 +308,8 @@ get_control_data(ClusterInfo *cluster, bool live_check) /* Epoch:Xid representation */ p++; /* remove '/' or ':' char */ - cluster->controldata.chkpnt_nxtxid = str2uint(p); - cluster->controldata.chkpnt_nxtepoch = (TransactionId) XidFromFullTransactionId(xid); + cluster->controldata.chkpnt_nxtxid = (XidFromFullTransactionId(xid)) << 32 | + (TransactionId) str2uint(p); } got_xid = true; @@ -332,7 +333,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_nxtmulti = str2uint(p); + cluster->controldata.chkpnt_nxtmulti = strtou64(p, NULL, 10); got_multi = true; } else if ((p = strstr(bufin, "Latest checkpoint's oldestXID:")) != NULL) @@ -343,7 +344,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_oldstxid = str2uint(p); + cluster->controldata.chkpnt_oldstxid = strtou64(p, NULL, 10); got_oldestxid = true; } else if ((p = strstr(bufin, "Latest checkpoint's oldestMultiXid:")) != NULL) @@ -354,7 +355,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_oldstMulti = str2uint(p); + cluster->controldata.chkpnt_oldstMulti = strtou64(p, NULL, 10); got_oldestmulti = true; } else if ((p = strstr(bufin, "Latest checkpoint's NextMultiOffset:")) != NULL) @@ -365,7 +366,7 @@ get_control_data(ClusterInfo *cluster, bool live_check) pg_fatal("%d: controldata retrieval problem\n", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_nxtmxoff = str2uint(p); + cluster->controldata.chkpnt_nxtmxoff = strtou64(p, NULL, 10); got_mxoff = true; } else if ((p = strstr(bufin, "First log segment after reset:")) != NULL) diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c index b84868c7510..4daacdd721e 100644 --- a/src/bin/pg_upgrade/file.c +++ b/src/bin/pg_upgrade/file.c @@ -25,6 +25,7 @@ #include "storage/bufpage.h" #include "storage/checksum.h" #include "storage/checksum_impl.h" +#include "storage/fsm_internals.h" /* @@ -152,6 +153,112 @@ linkFile(const char *src, const char *dst, schemaName, relName, src, dst, strerror(errno)); } +/* Context for file rewriting */ +typedef struct FileRewriteContext +{ + const char *fromfile; + const char *tofile; + const char *schemaName; + const char *relName; + int src_fd; + int dst_fd; + ssize_t src_filesize; + ssize_t totalBytesRead; + BlockNumber last_blkno; + bool old_lastblk; +} FileRewriteContext; + +/* Initialize context for file rewriting */ +static void +rewriteFileInit(FileRewriteContext * cxt, + const char *fromfile, const char *tofile, + const char *schemaName, const char *relName) +{ + struct stat statbuf; + + cxt->fromfile = fromfile; + cxt->tofile = tofile; + cxt->schemaName = schemaName; + cxt->relName = relName; + cxt->totalBytesRead = 0; + cxt->last_blkno = InvalidBlockNumber; + cxt->old_lastblk = false; + + /* Open old and new files */ + if ((cxt->src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0) + pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n", + schemaName, relName, fromfile, strerror(errno)); + + if (fstat(cxt->src_fd, &statbuf) != 0) + pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n", + schemaName, relName, fromfile, strerror(errno)); + + if ((cxt->dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + pg_file_create_mode)) < 0) + pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n", + schemaName, relName, tofile, strerror(errno)); + + /* Save old file size */ + cxt->src_filesize = statbuf.st_size; +} + +/* Clean up file rewriting context */ +static void +rewriteFileCleanup(FileRewriteContext * cxt) +{ + close(cxt->dst_fd); + close(cxt->src_fd); +} + +/* Read old page of the rewritten file */ +static ssize_t +rewriteFileReadPage(FileRewriteContext * cxt, Page page) +{ + ssize_t bytesRead; + + if (cxt->totalBytesRead >= cxt->src_filesize) + return 0; + + if ((bytesRead = read(cxt->src_fd, page, BLCKSZ)) != BLCKSZ) + { + if (bytesRead < 0) + pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n", + cxt->schemaName, cxt->relName, cxt->fromfile, strerror(errno)); + else + pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n", + cxt->schemaName, cxt->relName, cxt->fromfile); + } + + cxt->totalBytesRead += BLCKSZ; + cxt->old_lastblk = (cxt->totalBytesRead == cxt->src_filesize); + + return bytesRead; +} + +/* Write new page of the rewritten file */ +static void +rewriteFileWritePage(FileRewriteContext * cxt, Page page, BlockNumber blkno) +{ + /* Set new checksum for page, if enabled */ + if (new_cluster.controldata.data_checksum_version != 0) + ((PageHeader) page)->pd_checksum = pg_checksum_page(page, blkno); + + /* Write page */ + errno = 0; + + if ((blkno != (cxt->last_blkno == InvalidBlockNumber ? 0 : cxt->last_blkno + 1) && + lseek(cxt->dst_fd, (off_t) BLCKSZ * blkno, SEEK_SET) != (off_t) BLCKSZ * blkno) || + write(cxt->dst_fd, page, BLCKSZ) != BLCKSZ) + { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n", + cxt->schemaName, cxt->relName, cxt->tofile, strerror(errno)); + } + + cxt->last_blkno = blkno; +} /* * rewriteVisibilityMap() @@ -173,34 +280,17 @@ void rewriteVisibilityMap(const char *fromfile, const char *tofile, const char *schemaName, const char *relName) { - int src_fd; - int dst_fd; + FileRewriteContext cxt; PGAlignedBlock buffer; PGAlignedBlock new_vmbuf; - ssize_t totalBytesRead = 0; - ssize_t src_filesize; int rewriteVmBytesPerPage; BlockNumber new_blkno = 0; - struct stat statbuf; + ssize_t bytesRead; /* Compute number of old-format bytes per new page */ rewriteVmBytesPerPage = (BLCKSZ - SizeOfPageHeaderData) / 2; - if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0) - pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s\n", - schemaName, relName, fromfile, strerror(errno)); - - if (fstat(src_fd, &statbuf) != 0) - pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s\n", - schemaName, relName, fromfile, strerror(errno)); - - if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, - pg_file_create_mode)) < 0) - pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s\n", - schemaName, relName, tofile, strerror(errno)); - - /* Save old file size */ - src_filesize = statbuf.st_size; + rewriteFileInit(&cxt, fromfile, tofile, schemaName, relName); /* * Turn each visibility map page into 2 pages one by one. Each new page @@ -208,27 +298,12 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, * last page is empty, we skip it, mostly to avoid turning one-page * visibility maps for small relations into two pages needlessly. */ - while (totalBytesRead < src_filesize) + while ((bytesRead = rewriteFileReadPage(&cxt, buffer.data)) > 0) { - ssize_t bytesRead; char *old_cur; char *old_break; char *old_blkend; PageHeaderData pageheader; - bool old_lastblk; - - if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ) - { - if (bytesRead < 0) - pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n", - schemaName, relName, fromfile, strerror(errno)); - else - pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"\n", - schemaName, relName, fromfile); - } - - totalBytesRead += BLCKSZ; - old_lastblk = (totalBytesRead == src_filesize); /* Save the page header data */ memcpy(&pageheader, buffer.data, SizeOfPageHeaderData); @@ -253,7 +328,7 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData); /* Rewriting the last part of the last old page? */ - old_lastpart = old_lastblk && (old_break == old_blkend); + old_lastpart = cxt.old_lastblk && (old_break == old_blkend); new_cur = new_vmbuf.data + SizeOfPageHeaderData; @@ -287,20 +362,7 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, if (old_lastpart && empty) break; - /* Set new checksum for visibility map page, if enabled */ - if (new_cluster.controldata.data_checksum_version != 0) - ((PageHeader) new_vmbuf.data)->pd_checksum = - pg_checksum_page(new_vmbuf.data, new_blkno); - - errno = 0; - if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ) - { - /* if write didn't set errno, assume problem is no disk space */ - if (errno == 0) - errno = ENOSPC; - pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s\n", - schemaName, relName, tofile, strerror(errno)); - } + rewriteFileWritePage(&cxt, new_vmbuf.data, new_blkno); /* Advance for next new page */ old_break += rewriteVmBytesPerPage; @@ -308,9 +370,7 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, } } - /* Clean up */ - close(dst_fd); - close(src_fd); + rewriteFileCleanup(&cxt); } void diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index d440be50202..a9ae56217c8 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -44,6 +44,9 @@ #include #endif +#include "access/multixact.h" +#include "access/transam.h" +#include "access/xlog_internal.h" #include "catalog/pg_class_d.h" #include "common/file_perm.h" #include "common/logging.h" @@ -58,6 +61,7 @@ static void copy_xact_xlog_xid(void); static void set_frozenxids(bool minmxid_only); static void make_outputdirs(char *pgdata); static void setup(char *argv0, bool *live_check); +static bool is_xid_wraparound(ClusterInfo *cluster); ClusterInfo old_cluster, new_cluster; @@ -357,7 +361,6 @@ setup(char *argv0, bool *live_check) } } - static void prepare_new_cluster(void) { @@ -503,16 +506,57 @@ create_new_objects(void) check_ok(); /* - * We don't have minmxids for databases or relations in pre-9.3 clusters, - * so set those after we have restored the schema. + * Refix datfrozenxid and datminmxid */ if (GET_MAJOR_VERSION(old_cluster.major_version) <= 902) set_frozenxids(true); + else if (ALREADY_64bit_XID(old_cluster) != ALREADY_64bit_XID(new_cluster)) + { + /* + * During upgrade from 32-bit to 64-bit xids save relfrozenxids if + * there was no wraparound in old cluster. Otherwise, reset them to + * FirstNormalTransactionId value. + */ + if (is_xid_wraparound(&old_cluster)) + set_frozenxids(false); + else + set_frozenxids(true); + } /* update new_cluster info now that we have objects in the databases */ get_db_and_rel_infos(&new_cluster); } +/* + * is_xid_wraparound() + * + * Return true if 32-xid cluster had wraparound. + */ +static bool +is_xid_wraparound(ClusterInfo *cluster) +{ + PGconn *conn; + PGresult *res; + bool is_wraparound; + + conn = connectToServer(cluster, "template1"); + + /* + * txid_current is extended with an "epoch" counter, so to check + * wraparound in old 32-xid cluster we cut epoch by casting to int4. + */ + res = executeQueryOrDie(conn, + "SELECT 1 " + "FROM pg_catalog.pg_database, txid_current() tx " + "WHERE (tx %% 4294967295)::bigint <= datfrozenxid::text::bigint " + "LIMIT 1"); + is_wraparound = PQntuples(res) ? true : false; + PQclear(res); + PQfinish(conn); + + return is_wraparound; +} + /* * Delete the given subdirectory contents from the new cluster */ @@ -561,14 +605,32 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir) static void copy_xact_xlog_xid(void) { - /* - * Copy old commit logs to new data dir. pg_clog has been renamed to - * pg_xact in post-10 clusters. - */ - copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact", - GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact"); + TransactionId next_xid; + +#define GetClogDirName(cluster) \ + GET_MAJOR_VERSION(cluster.major_version) <= 906 ? "pg_clog" : "pg_xact" + + /* Set next xid to 2^32 if we're upgrading from 32 bit postgres */ + next_xid = ALREADY_64bit_XID(old_cluster) == ALREADY_64bit_XID(new_cluster) ? + old_cluster.controldata.chkpnt_nxtxid : + ((TransactionId) 1 << 32); + + if (ALREADY_64bit_XID(old_cluster) == ALREADY_64bit_XID(new_cluster)) + { + /* + * Copy old commit logs to new data dir. pg_clog has been renamed to + * pg_xact in post-10 clusters. + */ + copy_subdir_files(GetClogDirName(old_cluster), GetClogDirName(new_cluster)); + } + else + { + /* Convert commit logs and copy to the new data dir */ + prep_status("Transforming commit log segments"); + convert_xact(psprintf("%s/%s", old_cluster.pgdata, GetClogDirName(old_cluster)), + psprintf("%s/%s", new_cluster.pgdata, GetClogDirName(new_cluster))); + check_ok(); + } prep_status("Setting oldest XID for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, @@ -582,19 +644,20 @@ copy_xact_xlog_xid(void) prep_status("Setting next transaction ID and epoch for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -f -x %llu \"%s\"", - new_cluster.bindir, - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid, + new_cluster.bindir, (unsigned long long) next_xid, new_cluster.pgdata); +#ifdef NOT_USED exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -f -e %u \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch, new_cluster.pgdata); +#endif /* must reset commit timestamp limits also */ exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -f -c %llu,%llu \"%s\"", new_cluster.bindir, - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid, - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid, + (unsigned long long) next_xid, + (unsigned long long) next_xid, new_cluster.pgdata); check_ok(); @@ -607,8 +670,48 @@ copy_xact_xlog_xid(void) if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER && new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER) { - copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets"); - copy_subdir_files("pg_multixact/members", "pg_multixact/members"); + uint64 oldest_mxid = old_cluster.controldata.chkpnt_oldstMulti; + uint64 next_mxid = old_cluster.controldata.chkpnt_nxtmulti; + uint64 next_mxoff = old_cluster.controldata.chkpnt_nxtmxoff; + + if (ALREADY_64bit_XID(old_cluster)) + { + copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets"); + copy_subdir_files("pg_multixact/members", "pg_multixact/members"); + } + else + { + MultiXactOffset oldest_mxoff; + + remove_new_subdir("pg_multixact/offsets", false); + oldest_mxoff = convert_multixact_offsets("pg_multixact/offsets", "pg_multixact/offsets"); + + remove_new_subdir("pg_multixact/members", false); + convert_multixact_members("pg_multixact/members", "pg_multixact/members", oldest_mxoff); + + /* + * Handle wraparound if we're upgrading from 32 bit postgres. + * Invalid 0 mxids/offsets are skipped, so 1 becomes 2^32. + */ + if (oldest_mxoff) + { + if (next_mxid < oldest_mxid) + next_mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId; + + if (next_mxoff < oldest_mxoff) + next_mxoff += ((MultiXactOffset) 1 << 32) - 1; + + /* Offsets and members were rewritten, oldest_mxoff = 1 */ + next_mxoff -= oldest_mxoff - 1; + oldest_mxoff = 1; + + /* + * Save converted next_mxid for possible usage in + * set_frozenxids() + */ + old_cluster.controldata.chkpnt_nxtmulti = next_mxid; + } + } prep_status("Setting next multixact ID and offset for new cluster"); @@ -619,9 +722,9 @@ copy_xact_xlog_xid(void) exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -O %llu -m %llu,%llu \"%s\"", new_cluster.bindir, - (unsigned long long) old_cluster.controldata.chkpnt_nxtmxoff, - (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti, - (unsigned long long) old_cluster.controldata.chkpnt_oldstMulti, + (unsigned long long) next_mxoff, + (unsigned long long) next_mxid, + (unsigned long long) oldest_mxid, new_cluster.pgdata); check_ok(); } @@ -695,6 +798,8 @@ set_frozenxids(bool minmxid_only) int ntups; int i_datname; int i_datallowconn; + TransactionId frozen_xid; + MultiXactId minmxid; if (!minmxid_only) prep_status("Setting frozenxid and minmxid counters in new cluster"); @@ -703,18 +808,24 @@ set_frozenxids(bool minmxid_only) conn_template1 = connectToServer(&new_cluster, "template1"); + frozen_xid = ALREADY_64bit_XID(old_cluster) == ALREADY_64bit_XID(new_cluster) ? + old_cluster.controldata.chkpnt_nxtxid : + FirstNormalTransactionId; + + minmxid = old_cluster.controldata.chkpnt_nxtmulti; + if (!minmxid_only) /* set pg_database.datfrozenxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " "SET datfrozenxid = '%llu'", - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid)); + (unsigned long long) frozen_xid)); /* set pg_database.datminmxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " "SET datminmxid = '%llu'", - (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti)); + (unsigned long long) minmxid)); /* get database names */ dbres = executeQueryOrDie(conn_template1, @@ -754,7 +865,7 @@ set_frozenxids(bool minmxid_only) CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ", " CppAsString2(RELKIND_TOASTVALUE) ")", - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid)); + (unsigned long long) frozen_xid)); /* set pg_class.relminmxid */ PQclear(executeQueryOrDie(conn, @@ -765,7 +876,7 @@ set_frozenxids(bool minmxid_only) CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ", " CppAsString2(RELKIND_TOASTVALUE) ")", - (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti)); + (unsigned long long) minmxid)); PQfinish(conn); /* Reset datallowconn flag */ diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 55de244ac01..3cb9e63874b 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -24,6 +24,7 @@ #define MESSAGE_WIDTH 60 #define GET_MAJOR_VERSION(v) ((v) / 100) +#define ALREADY_64bit_XID(cluster) (GET_MAJOR_VERSION((cluster).major_version) >= 1500) /* contains both global db information and CREATE DATABASE commands */ #define GLOBALS_DUMP_FILE "pg_upgrade_dump_globals.sql" @@ -198,13 +199,13 @@ typedef struct uint32 ctrl_ver; uint32 cat_ver; char nextxlogfile[25]; - uint32 chkpnt_nxtxid; - uint32 chkpnt_nxtepoch; + uint64 chkpnt_nxtxid; + uint32 chkpnt_nxtepoch; /* for 32bit xids only */ uint32 chkpnt_nxtoid; - uint32 chkpnt_nxtmulti; - uint32 chkpnt_nxtmxoff; - uint32 chkpnt_oldstMulti; - uint32 chkpnt_oldstxid; + uint64 chkpnt_nxtmulti; + uint64 chkpnt_nxtmxoff; + uint64 chkpnt_oldstMulti; + uint64 chkpnt_oldstxid; uint32 align; uint32 blocksz; uint32 largesz; @@ -457,6 +458,10 @@ void old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, void old_11_check_for_sql_identifier_data_type_usage(ClusterInfo *cluster); void report_extension_updates(ClusterInfo *cluster); +void invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode); +void invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode); +void invalidate_external_indexes(ClusterInfo *cluster, bool check_mode); + /* parallel.c */ void parallel_exec_prog(const char *log_file, const char *opt_log_file, const char *fmt,...) pg_attribute_printf(3, 4); @@ -464,3 +469,9 @@ void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr char *old_pgdata, char *new_pgdata, char *old_tablespace); bool reap_child(bool wait_for_child); + +/* segresize.c */ +void convert_xact(const char *olddir, const char *newdir); +MultiXactOffset convert_multixact_offsets(const char *olddir, const char *newdir); +void convert_multixact_members(const char *olddir, const char *newdir, + MultiXactOffset oldest_mxoff); diff --git a/src/bin/pg_upgrade/segresize.c b/src/bin/pg_upgrade/segresize.c new file mode 100644 index 00000000000..99e2c5ecdea --- /dev/null +++ b/src/bin/pg_upgrade/segresize.c @@ -0,0 +1,586 @@ +/*------------------------------------------------------------------------- + * + * segresize.c + * SLRU segment resize utility from 32bit to 64bit xid format + * + * Copyright (c) 2015-2022, Postgres Professional + * + * IDENTIFICATION + * src/bin/pg_upgrade/segresize.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include "pg_upgrade.h" +#include "access/multixact.h" +#include "access/transam.h" + +#define SLRU_PAGES_PER_SEGMENT_OLD 32 +#define SLRU_PAGES_PER_SEGMENT 32 /* Should be equal to value from slru.h */ + +#define CLOG_BITS_PER_XACT 2 +#define CLOG_XACTS_PER_BYTE 4 +#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE) + +typedef uint32 MultiXactId32; +typedef uint32 MultiXactOffset32; +typedef uint32 TransactionId32; + +#define MaxTransactionId32 ((TransactionId32) 0xFFFFFFFF) +#define MaxMultiXactId32 ((MultiXactId32) 0xFFFFFFFF) +#define MaxMultiXactOffset32 ((MultiXactOffset32) 0xFFFFFFFF) + +#define MULTIXACT_OFFSETS_PER_PAGE_OLD (BLCKSZ / sizeof(MultiXactOffset32)) +#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset)) + +#define MXACT_MEMBER_FLAGS_PER_BYTE 1 + +/* 64xid */ +#define MULTIXACT_FLAGBYTES_PER_GROUP 8 +#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \ + (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE) +/* size in bytes of a complete group */ +#define MULTIXACT_MEMBERGROUP_SIZE \ + (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP) +#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE) + +/* 32xid */ +#define MULTIXACT_FLAGBYTES_PER_GROUP_OLD 4 +#define MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD \ + (MULTIXACT_FLAGBYTES_PER_GROUP_OLD * MXACT_MEMBER_FLAGS_PER_BYTE) +/* size in bytes of a complete group */ +#define MULTIXACT_MEMBERGROUP_SIZE_OLD \ + (sizeof(TransactionId32) * MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD + MULTIXACT_FLAGBYTES_PER_GROUP_OLD) +#define MULTIXACT_MEMBERGROUPS_PER_PAGE_OLD (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE_OLD) +#define MULTIXACT_MEMBERS_PER_PAGE_OLD \ + (MULTIXACT_MEMBERGROUPS_PER_PAGE_OLD * MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD) + +typedef struct SLRUSegmentState +{ + const char *dir; + FILE *file; + int64 segno; + int64 pageno; + bool is_empty_segment; +} SLRUSegmentState; + +static char * +slru_filename_old(const char *path, int64 segno) +{ + Assert(segno <= PG_INT32_MAX); + return psprintf("%s/%04X", path, (int) segno); +} + +static char * +slru_filename_new(const char *path, int64 segno) +{ + return psprintf("%s/%012llX", path, (long long) segno); +} + +static inline FILE * +open_file(SLRUSegmentState *state, + char * (filename_fn)(const char *path, int64 segno), + char *mode, char *fatal_msg) +{ + char *filename = filename_fn(state->dir, state->segno); + FILE *fd = fopen(filename, mode); + + if (!fd) + pg_fatal(fatal_msg, filename); + + pfree(filename); + + return fd; +} + +static void +close_file(SLRUSegmentState *state, + char * (filename_fn)(const char *path, int64 segno)) +{ + if (state->file != NULL) + { + if (fclose(state->file) != 0) + pg_fatal("could not close file \"%s\": %m", + filename_fn(state->dir, state->segno)); + state->file = NULL; + } +} + +static inline int +read_file(SLRUSegmentState *state, void *buf) +{ + size_t n = fread(buf, sizeof(char), BLCKSZ, state->file); + + if (n != 0) + return n; + + if (ferror(state->file)) + pg_fatal("could not read file \"%s\": %m", + slru_filename_old(state->dir, state->segno)); + + if (!feof(state->file)) + pg_fatal("unknown file read state \"%s\": %m", + slru_filename_old(state->dir, state->segno)); + + close_file(state, slru_filename_old); + + return 0; +} + +static int +read_old_segment_page(SLRUSegmentState *state, void *buf, bool *is_empty) +{ + int n; + + /* Open next segment file, if needed */ + if (!state->file) + { + state->file = open_file(state, slru_filename_old, "rb", + "could not open source file \"%s\": %m"); + + /* Set position to the needed page */ + if (fseek(state->file, state->pageno * BLCKSZ, SEEK_SET)) + close_file(state, slru_filename_old); + + /* + * Skip segment conversion if segment file doesn't exist. + * First segment file should exist in any case. + */ + if (state->segno != 0) + state->is_empty_segment = true; + } + + if (state->file) + { + /* Segment file does exist, read page from it */ + state->is_empty_segment = false; + + /* Try to read BLCKSZ bytes */ + n = read_file(state, buf); + *is_empty = (n == 0); + + /* Zeroing buf tail if needed */ + if (n) + memset((char *) buf + n, 0, BLCKSZ - n); + } + else + { + n = state->is_empty_segment ? + BLCKSZ : /* Skip empty block at the end of segment */ + 0; /* We reached the last segment */ + *is_empty = true; + + if (n) + memset((char *) buf, 0, BLCKSZ); + } + + state->pageno++; + + if (state->pageno >= SLRU_PAGES_PER_SEGMENT_OLD) + { + /* Start new segment */ + state->segno++; + state->pageno = 0; + close_file(state, slru_filename_old); + } + + return n; +} + +static void +write_new_segment_page(SLRUSegmentState *state, void *buf, bool is_empty) +{ + /* + * Create a new segment file if we still didn't. Creation is postponed + * until the first non-empty page is found. This helps not to create + * completely empty segments. + */ + if (!state->file && !is_empty) + { + state->file = open_file(state, slru_filename_new, "wb", + "could not open target file \"%s\": %m"); + + /* Write zeroes to the previously skipped prefix */ + if (state->pageno > 0) + { + char zerobuf[BLCKSZ] = {0}; + + for (int64 i = 0; i < state->pageno; i++) + { + if (fwrite(zerobuf, sizeof(char), BLCKSZ, state->file) != BLCKSZ) + pg_fatal("could not write file \"%s\": %m", + slru_filename_new(state->dir, state->segno)); + } + } + + } + + /* Write page to the new segment (if it was created) */ + if (state->file) + { + if (fwrite(buf, sizeof(char), BLCKSZ, state->file) != BLCKSZ) + pg_fatal("could not write file \"%s\": %m", + slru_filename_new(state->dir, state->segno)); + } + + state->pageno++; + + /* + * Did we reach the maximum page number? Then close segment file and + * create a new one on the next iteration + */ + if (state->pageno >= SLRU_PAGES_PER_SEGMENT) + { + state->segno++; + state->pageno = 0; + close_file(state, slru_filename_new); + } +} + +/* + * Convert pg_xact segments. + */ +void +convert_xact(const char *old_subdir, const char *new_subdir) +{ + SLRUSegmentState oldseg = {0}; + SLRUSegmentState newseg = {0}; + TransactionId oldest_xid = old_cluster.controldata.chkpnt_oldstxid; + TransactionId next_xid = old_cluster.controldata.chkpnt_nxtxid; + TransactionId xid; + int64 pageno; + char buf[BLCKSZ] = {0}; + + oldseg.dir = old_subdir; + newseg.dir = new_subdir; + + pageno = oldest_xid / CLOG_XACTS_PER_PAGE; + + oldseg.segno = pageno / SLRU_PAGES_PER_SEGMENT_OLD; + oldseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT_OLD; + + newseg.segno = pageno / SLRU_PAGES_PER_SEGMENT; + newseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT; + + if (next_xid < oldest_xid) + next_xid += (TransactionId) 1 << 32; /* wraparound */ + + /* Copy xid flags reading only needed segment pages */ + for (xid = oldest_xid & ~(CLOG_XACTS_PER_PAGE - 1); + xid <= ((next_xid - 1) & ~(CLOG_XACTS_PER_PAGE - 1)); + xid += CLOG_XACTS_PER_PAGE) + { + bool is_empty; + + /* Handle possible segment wraparound */ + if (oldseg.segno > MaxTransactionId32 / CLOG_XACTS_PER_PAGE / SLRU_PAGES_PER_SEGMENT_OLD) + { + pageno = (MaxTransactionId32 + 1) / CLOG_XACTS_PER_PAGE; + + Assert(oldseg.segno == pageno / SLRU_PAGES_PER_SEGMENT_OLD); + Assert(!oldseg.pageno); + Assert(!oldseg.file); + oldseg.segno = 0; + + Assert(newseg.segno == pageno / SLRU_PAGES_PER_SEGMENT); + Assert(!newseg.pageno); + Assert(!newseg.file); + newseg.segno = 0; + } + + read_old_segment_page(&oldseg, buf, &is_empty); + write_new_segment_page(&newseg, buf, is_empty); + } + + /* Release resources */ + close_file(&oldseg, slru_filename_old); + close_file(&newseg, slru_filename_new); +} + +static inline SLRUSegmentState +create_slru_segment_state(MultiXactId mxid, + int offsets_per_page, + int pages_per_segment, + char *dir) +{ + SLRUSegmentState seg = {0}; + int64 n; + + n = mxid / offsets_per_page; + seg.pageno = n % pages_per_segment; + seg.segno = n / pages_per_segment; + seg.dir = dir; + + return seg; +} + +/* + * Convert pg_multixact/offsets segments and return oldest mxid offset. + */ +MultiXactOffset +convert_multixact_offsets(const char *old_subdir, const char *new_subdir) +{ + SLRUSegmentState oldseg, + newseg; + MultiXactOffset32 oldbuf[MULTIXACT_OFFSETS_PER_PAGE_OLD] = {0}; + MultiXactOffset newbuf[MULTIXACT_OFFSETS_PER_PAGE] = {0}; + MultiXactOffset32 oldest_mxoff = 0; + MultiXactId oldest_mxid, + next_mxid, + mxid; + uint64 old_entry, + new_entry; + bool oldest_mxoff_known = false; + + StaticAssertStmt((sizeof(oldbuf) == BLCKSZ && sizeof(newbuf) == BLCKSZ), + "buf should be BLCKSZ"); + + oldest_mxid = old_cluster.controldata.chkpnt_oldstMulti; + + oldseg = create_slru_segment_state(oldest_mxid, + MULTIXACT_OFFSETS_PER_PAGE_OLD, + SLRU_PAGES_PER_SEGMENT_OLD, + psprintf("%s/%s", old_cluster.pgdata, + old_subdir)); + + newseg = create_slru_segment_state(oldest_mxid, + MULTIXACT_OFFSETS_PER_PAGE, + SLRU_PAGES_PER_SEGMENT, + psprintf("%s/%s", new_cluster.pgdata, + new_subdir)); + + old_entry = oldest_mxid % MULTIXACT_OFFSETS_PER_PAGE_OLD; + new_entry = oldest_mxid % MULTIXACT_OFFSETS_PER_PAGE; + + next_mxid = old_cluster.controldata.chkpnt_nxtmulti; + if (next_mxid < oldest_mxid) + next_mxid += (MultiXactId) 1 << 32; /* wraparound */ + + prep_status("Converting old %s to new format", old_subdir); + + /* Copy mxid offsets reading only needed segment pages */ + for (mxid = oldest_mxid; mxid < next_mxid; old_entry = 0) + { + int oldlen; + bool is_empty; + + /* Handle possible segment wraparound */ + if (oldseg.segno > MaxMultiXactId32 / MULTIXACT_OFFSETS_PER_PAGE_OLD / SLRU_PAGES_PER_SEGMENT_OLD) /* 0xFFFF */ + oldseg.segno = 0; + + oldlen = read_old_segment_page(&oldseg, oldbuf, &is_empty); + + if (oldlen == 0 || is_empty) + pg_fatal("cannot read page %lld from segment: %s\n", + (long long) oldseg.pageno, + slru_filename_old(oldseg.dir, oldseg.segno)); + + /* Save oldest mxid offset */ + if (!oldest_mxoff_known) + { + oldest_mxoff = oldbuf[old_entry]; + oldest_mxoff_known = true; + } + + /* Skip wrapped-around invalid MultiXactIds */ + if (mxid == (MultiXactId) 1 << 32) + { + Assert(oldseg.segno == 0); + Assert(oldseg.pageno == 1); + Assert(old_entry == 0); + mxid += FirstMultiXactId; + old_entry = FirstMultiXactId; + } + + /* Copy entries to the new page */ + for (; mxid < next_mxid && old_entry < MULTIXACT_OFFSETS_PER_PAGE_OLD; + mxid++, old_entry++) + { + MultiXactOffset mxoff = oldbuf[old_entry]; + + /* Handle possible offset wraparound (1 becomes 2^32) */ + if (mxoff < oldest_mxoff) + mxoff += ((MultiXactOffset) 1 << 32) - 1; + + /* Subtract oldest_mxoff, so new offsets will start from 1 */ + newbuf[new_entry++] = mxoff - oldest_mxoff + 1; + + if (new_entry >= MULTIXACT_OFFSETS_PER_PAGE) + { + /* Write new page */ + write_new_segment_page(&newseg, newbuf, false); + new_entry = 0; + } + } + } + + /* Write the last incomplete page */ + if (new_entry > 0 || oldest_mxid == next_mxid) + { + memset(&newbuf[new_entry], 0, + sizeof(newbuf[0]) * (MULTIXACT_OFFSETS_PER_PAGE - new_entry)); + write_new_segment_page(&newseg, newbuf, false); + } + + /* Use next_mxoff as oldest_mxoff, if oldest_mxid == next_mxid */ + if (!oldest_mxoff_known) + { + Assert(oldest_mxid == next_mxid); + oldest_mxoff = (MultiXactOffset) old_cluster.controldata.chkpnt_nxtmxoff; + } + + /* Release resources */ + close_file(&oldseg, slru_filename_old); + close_file(&newseg, slru_filename_new); + + pfree((char *) oldseg.dir); + pfree((char *) newseg.dir); + + check_ok(); + + return oldest_mxoff; +} + +/* + * Convert pg_multixact/members segments, offsets will start from 1. + */ +void +convert_multixact_members(const char *old_subdir, const char *new_subdir, + MultiXactOffset oldest_mxoff) +{ + MultiXactOffset next_mxoff, + mxoff; + SLRUSegmentState oldseg, + newseg; + char oldbuf[BLCKSZ] = {0}, + newbuf[BLCKSZ] = {0}; + int newgroup, + newmember; + char *newflag = newbuf; + TransactionId *newxid; + int oldidx, + newidx; + + prep_status("Converting old %s to new format", old_subdir); + + next_mxoff = (MultiXactOffset) old_cluster.controldata.chkpnt_nxtmxoff; + if (next_mxoff < oldest_mxoff) + next_mxoff += (MultiXactOffset) 1 << 32; + + newxid = (TransactionId *) (newflag + MXACT_MEMBER_FLAGS_PER_BYTE * MULTIXACT_MEMBERS_PER_MEMBERGROUP); + + /* Initialize old starting position */ + oldidx = oldest_mxoff % MULTIXACT_MEMBERS_PER_PAGE_OLD; + oldseg = create_slru_segment_state(oldest_mxoff, + MULTIXACT_MEMBERS_PER_PAGE_OLD, + SLRU_PAGES_PER_SEGMENT_OLD, + psprintf("%s/%s", old_cluster.pgdata, + old_subdir)); + + /* Initialize empty new segment */ + newseg = create_slru_segment_state(0, 1, 1, + psprintf("%s/%s", new_cluster.pgdata, + new_subdir)); + + /* Initialize new starting position (skip invalid zero offset) */ + newgroup = 0; + newidx = 1; + newmember = 1; + newflag++; + newxid++; + + /* Iterate through the original directory */ + for (mxoff = oldest_mxoff; mxoff < next_mxoff; oldidx = 0) + { + bool old_is_empty; + int oldlen; + int ngroups; + int oldgroup; + int oldmember; + + oldlen = read_old_segment_page(&oldseg, oldbuf, &old_is_empty); + + if (oldlen == 0 || old_is_empty) + pg_fatal("cannot read page %lld from segment: %s\n", + (long long) oldseg.pageno, + slru_filename_old(oldseg.dir, oldseg.segno)); + + ngroups = oldlen / MULTIXACT_MEMBERGROUP_SIZE_OLD; + + /* Iterate through old member groups */ + for (oldgroup = oldidx / MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD, + oldmember = oldidx % MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD; + oldgroup < ngroups && mxoff < next_mxoff; + oldgroup++, oldmember = 0) + { + char *oldflag = (char *) oldbuf + oldgroup * MULTIXACT_MEMBERGROUP_SIZE_OLD; + TransactionId32 *oldxid = (TransactionId32 *) (oldflag + MULTIXACT_FLAGBYTES_PER_GROUP_OLD); + + oldxid += oldmember; + oldflag += oldmember; + + /* Iterate through old members */ + for (int i = 0; + i < MULTIXACT_MEMBERS_PER_MEMBERGROUP_OLD && mxoff < next_mxoff; + i++) + { + /* Copy member's xid and flags to the new page */ + *newflag++ = *oldflag++; + *newxid++ = (TransactionId) * oldxid++; + + newidx++; + oldidx++; + mxoff++; + + if (++newmember >= MULTIXACT_MEMBERS_PER_MEMBERGROUP) + { + /* Start next member group */ + newmember = 0; + + if (++newgroup >= MULTIXACT_MEMBERGROUPS_PER_PAGE) + { + /* Write current page and start new */ + newgroup = 0; + newidx = 0; + write_new_segment_page(&newseg, newbuf, false); + memset(newbuf, 0, BLCKSZ); + } + + newflag = (char *) newbuf + newgroup * MULTIXACT_MEMBERGROUP_SIZE; + newxid = (TransactionId *) (newflag + MXACT_MEMBER_FLAGS_PER_BYTE * MULTIXACT_MEMBERS_PER_MEMBERGROUP); + } + + /* Handle offset wraparound */ + if (mxoff > MaxMultiXactOffset32) + { + Assert(mxoff == (MultiXactOffset) 1 << 32); + Assert(oldseg.segno == MaxMultiXactOffset32 / MULTIXACT_MEMBERS_PER_PAGE_OLD / SLRU_PAGES_PER_SEGMENT_OLD); + Assert(oldseg.pageno == MaxMultiXactOffset32 / MULTIXACT_MEMBERS_PER_PAGE_OLD % SLRU_PAGES_PER_SEGMENT_OLD); + Assert(oldmember == MaxMultiXactOffset32 % MULTIXACT_MEMBERS_PER_PAGE_OLD); + + /* Switch to segment 0000 */ + close_file(&oldseg, slru_filename_old); + oldseg.segno = 0; + oldseg.pageno = 0; + + oldidx = 1; /* skip invalid zero mxid offset */ + } + } + } + } + + /* Write last page, unless it is empty */ + if (newflag > (char *) newbuf || oldest_mxoff == next_mxoff) + write_new_segment_page(&newseg, newbuf, false); + + /* Release resources */ + close_file(&oldseg, slru_filename_old); + close_file(&newseg, slru_filename_new); + + pfree((char *) oldseg.dir); + pfree((char *) newseg.dir); + + check_ok(); +} diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 3f11540e189..3c1a4dee7dd 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -60,7 +60,7 @@ my $oldnode = # To increase coverage of non-standard segment size and group access without # increasing test runtime, run these tests with a custom setting. # --allow-group-access and --wal-segsize have been added in v11. -$oldnode->init(extra => [ '--wal-segsize', '1', '--allow-group-access' ]); +$oldnode->init(extra => [ '--wal-segsize', '1', '--allow-group-access', '-x', '21000000000' ]); $oldnode->start; # The default location of the source code is the root of this directory. @@ -145,9 +145,17 @@ if (defined($ENV{oldinstall})) 'ran adapt script'); } +$oldnode->safe_psql('regression', + "CREATE TABLE t1 (id SERIAL NOT NULL PRIMARY KEY, plt text, pln NUMERIC(8, 4)); + INSERT INTO t1 (plt, pln) SELECT md5(random()::text), random() * 9999 FROM generate_series(1, 1000);"); +my $relfrozenxid = $oldnode->safe_psql('regression', + "SELECT relfrozenxid FROM pg_class WHERE relname = 't1';"); +my $relminmxid = $oldnode->safe_psql('regression', + "SELECT relminmxid FROM pg_class WHERE relname = 't1';"); + # Initialize a new node for the upgrade. my $newnode = PostgreSQL::Test::Cluster->new('new_node'); -$newnode->init(extra => [ '--wal-segsize', '1', '--allow-group-access' ]); +$newnode->init(extra => [ '--wal-segsize', '1', '--allow-group-access', '-x', '21000000000' ]); my $newbindir = $newnode->config_data('--bindir'); my $oldbindir = $oldnode->config_data('--bindir'); @@ -261,6 +269,16 @@ ok( !-d $newnode->data_dir . "/pg_upgrade_output.d", $newnode->start; +my $relfrozenxid_new = $newnode->safe_psql('regression', + "SELECT relfrozenxid FROM pg_class WHERE relname = 't1';"); + +is($relfrozenxid_new, $relfrozenxid, 'old and new relfrozenxid match after pg_upgrade'); + +my $relminmxid_new = $newnode->safe_psql('regression', + "SELECT relminmxid FROM pg_class WHERE relname = 't1';"); + +is($relminmxid_new, $relminmxid, 'old and new relminmxid match after pg_upgrade'); + # Check if there are any logs coming from pg_upgrade, that would only be # retained on failure. my $log_path = $newnode->data_dir . "/pg_upgrade_output.d"; diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c index c694558c3d6..10dcd603371 100644 --- a/src/bin/pg_upgrade/version.c +++ b/src/bin/pg_upgrade/version.c @@ -9,6 +9,7 @@ #include "postgres_fe.h" +#include "access/transam.h" #include "catalog/pg_class_d.h" #include "fe_utils/string_utils.h" #include "pg_upgrade.h" @@ -238,19 +239,21 @@ old_9_6_check_for_unknown_data_type_usage(ClusterInfo *cluster) } /* - * old_9_6_invalidate_hash_indexes() - * 9.6 -> 10 - * Hash index binary format has changed from 9.6->10.0 + * invalidate_indexes() + * Invalidates all indexes satisfying given predicate. */ -void -old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) +static void +invalidate_indexes(ClusterInfo *cluster, bool check_mode, + const char *name, const char *pred) { int dbnum; FILE *script = NULL; bool found = false; - char *output_path = "reindex_hash.sql"; + char output_path[MAXPGPATH]; + + snprintf(output_path, sizeof(output_path), "reindex_%s.sql", name); - prep_status("Checking for hash indexes"); + prep_status("Checking for %s indexes", name); for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) { @@ -263,9 +266,16 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; PGconn *conn = connectToServer(cluster, active_db->db_name); - /* find hash indexes */ - res = executeQueryOrDie(conn, - "SELECT n.nspname, c.relname " + + /* + * Find indexes satisfying predicate. + * + * System indexes (with oids < FirstNormalObjectId) are excluded from + * the search as they are recreated in the new cluster during initdb. + */ + res = executeQueryOrDie( + conn, + "SELECT n.nspname, c.relname, i.indexrelid " "FROM pg_catalog.pg_class c, " " pg_catalog.pg_index i, " " pg_catalog.pg_am a, " @@ -273,8 +283,11 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) "WHERE i.indexrelid = c.oid AND " " c.relam = a.oid AND " " c.relnamespace = n.oid AND " - " a.amname = 'hash'" - ); + " i.indexrelid >= '%u'::pg_catalog.oid AND " + " %s " + "ORDER BY i.indexrelid ASC", + FirstNormalObjectId, + pred); ntups = PQntuples(res); i_nspname = PQfnumber(res, "nspname"); @@ -307,8 +320,14 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) if (!check_mode && db_used) { - /* mark hash indexes as invalid */ - PQclear(executeQueryOrDie(conn, + /* + * Mark indexes satisfying predicate as invalid. + * + * System indexes (with oids < FirstNormalObjectId) are excluded + * from the search (see above). + */ + PQclear(executeQueryOrDie( + conn, "UPDATE pg_catalog.pg_index i " "SET indisvalid = false " "FROM pg_catalog.pg_class c, " @@ -317,7 +336,10 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) "WHERE i.indexrelid = c.oid AND " " c.relam = a.oid AND " " c.relnamespace = n.oid AND " - " a.amname = 'hash'")); + " i.indexrelid >= '%u'::pg_catalog.oid AND " + " %s", + FirstNormalObjectId, + pred)); } PQfinish(conn); @@ -331,24 +353,37 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) report_status(PG_WARNING, "warning"); if (check_mode) pg_log(PG_WARNING, "\n" - "Your installation contains hash indexes. These indexes have different\n" + "Your installation contains %s indexes. These indexes have different\n" "internal formats between your old and new clusters, so they must be\n" "reindexed with the REINDEX command. After upgrading, you will be given\n" - "REINDEX instructions.\n\n"); + "REINDEX instructions.\n\n", + name); else pg_log(PG_WARNING, "\n" - "Your installation contains hash indexes. These indexes have different\n" + "Your installation contains %s indexes. These indexes have different\n" "internal formats between your old and new clusters, so they must be\n" "reindexed with the REINDEX command. The file\n" " %s\n" "when executed by psql by the database superuser will recreate all invalid\n" "indexes; until then, none of these indexes will be used.\n\n", + name, output_path); } else check_ok(); } +/* + * old_9_6_invalidate_hash_indexes() + * 9.6 -> 10 + * Hash index binary format has changed from 9.6->10.0 + */ +void +old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "hash", "a.amname = 'hash'"); +} + /* * old_11_check_for_sql_identifier_data_type_usage() * 11 -> 12 @@ -458,3 +493,36 @@ report_extension_updates(ClusterInfo *cluster) else check_ok(); } + +/* + * invalidate_spgist_indexes() + * 32bit -> 64bit + * SP-GIST contains xids. + */ +void +invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "spgist", "a.amname = 'spgist'"); +} + +/* + * invalidate_gin_indexes() + * 32bit -> 64bit + * Gin indexes contains xids in deleted pages. + */ +void +invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "gin", "a.amname = 'gin'"); +} + +/* + * invalidate_external_indexes() + * Generate script to REINDEX non standard external indexes (like RUM etc) + */ +void +invalidate_external_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "external", + "NOT a.amname IN ('btree', 'hash', 'gist', 'gin', 'spgist', 'brin')"); +} diff --git a/src/bin/pg_verifybackup/t/003_corruption.pl b/src/bin/pg_verifybackup/t/003_corruption.pl index f1ceb4a4bd1..f4109471acb 100644 --- a/src/bin/pg_verifybackup/t/003_corruption.pl +++ b/src/bin/pg_verifybackup/t/003_corruption.pl @@ -174,7 +174,7 @@ sub mutilate_extra_tablespace_file sub mutilate_missing_file { my ($backup_path) = @_; - my $pathname = "$backup_path/pg_xact/000000000000"; + my $pathname = "$backup_path/pg_xact/000000123000"; unlink($pathname) || die "$pathname: $!"; return; } diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index b3ed4dc2f65..7fce9477aa5 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -919,7 +919,7 @@ main(int argc, char **argv) config.filter_by_fpw = true; break; case 'x': - if (sscanf(optarg, "%u", &config.filter_by_xid) != 1) + if (sscanf(optarg, "%" INT64_MODIFIER "u", &config.filter_by_xid) != 1) { pg_log_error("invalid transaction ID specification: \"%s\"", optarg); diff --git a/src/include/access/clog.h b/src/include/access/clog.h index 543f2e2643a..73bc172309a 100644 --- a/src/include/access/clog.h +++ b/src/include/access/clog.h @@ -31,7 +31,7 @@ typedef int XidStatus; typedef struct xl_clog_truncate { - int pageno; + int64 pageno; TransactionId oldestXact; Oid oldestXactDb; } xl_clog_truncate; diff --git a/src/include/access/ginblock.h b/src/include/access/ginblock.h index 9347f464f34..4db042c3196 100644 --- a/src/include/access/ginblock.h +++ b/src/include/access/ginblock.h @@ -133,8 +133,15 @@ typedef struct GinMetaPageData * We should reclaim deleted page only once every transaction started before * its deletion is over. */ -#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) -#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid) +#define GinPageGetDeleteXid(page) ( \ + (((PageHeader) (page))->pd_upper == BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId)) ? \ + *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) : \ + InvalidTransactionId ) +#define GinPageSetDeleteXid(page, xid) \ + do { \ + ((PageHeader) (page))->pd_upper = BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId); \ + *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) = xid; \ + } while (false) extern bool GinPageIsRecyclable(Page page); /* diff --git a/src/include/access/gist.h b/src/include/access/gist.h index a3337627b8f..41de3052fdd 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -223,7 +223,7 @@ GistPageGetDeleteXid(Page page) return ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid; } else - return FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); + return FullTransactionIdFromXid(FirstNormalTransactionId); } /* diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index abf62d9df79..843be91f0c6 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -146,6 +146,9 @@ extern void ReleaseBulkInsertStatePin(BulkInsertState bistate); extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate); +extern bool heap_page_prepare_for_xid(Relation relation, Buffer buffer, + TransactionId xid, bool multi); +extern void rewrite_page_prepare_for_xid(Page page, HeapTuple tup); extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate); @@ -164,14 +167,14 @@ extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, Buffer *buffer, struct TM_FailureData *tmfd); extern void heap_inplace_update(Relation relation, HeapTuple tuple); -extern bool heap_freeze_tuple(HeapTupleHeader tuple, +extern bool heap_freeze_tuple(HeapTuple tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, TransactionId cutoff_multi); -extern bool heap_tuple_would_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid, +extern bool heap_tuple_would_freeze(HeapTuple htup, TransactionId cutoff_xid, MultiXactId cutoff_multi, TransactionId *relfrozenxid_out, MultiXactId *relminmxid_out); -extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); +extern bool heap_tuple_needs_eventual_freeze(HeapTuple htup); extern void simple_heap_insert(Relation relation, HeapTuple tup); extern void simple_heap_delete(Relation relation, ItemPointer tid); @@ -189,11 +192,13 @@ extern int heap_page_prune(Relation relation, Buffer buffer, TransactionId old_snap_xmin, TimestampTz old_snap_ts_ts, int *nnewlpdead, - OffsetNumber *off_loc); + OffsetNumber *off_loc, + bool repairFragmentation); extern void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, - OffsetNumber *nowunused, int nunused); + OffsetNumber *nowunused, int nunused, + bool repairFragmentation); extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets); /* in heap/vacuumlazy.c */ @@ -212,7 +217,7 @@ extern HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple stup, Buffer buffer TransactionId *dead_after); extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid); -extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple); +extern bool HeapTupleIsOnlyLocked(HeapTuple htup); extern bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot); extern bool HeapTupleIsSurelyDead(HeapTuple htup, struct GlobalVisState *vistest); diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index 2d8a7f62706..bc6e08800d2 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -59,6 +59,8 @@ #define XLOG_HEAP2_LOCK_UPDATED 0x60 #define XLOG_HEAP2_NEW_CID 0x70 +#define XLOG_HEAP3_BASE_SHIFT 0x00 + /* * xl_heap_insert/xl_heap_multi_insert flag values, 8 bits are available. */ @@ -389,7 +391,16 @@ typedef struct xl_heap_rewrite_mapping XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */ } xl_heap_rewrite_mapping; -extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple, +/* shift the base of xids on heap page */ +typedef struct xl_heap_base_shift +{ + int64 delta; /* delta value to shift the base */ + bool multi; /* true to shift multixact base */ +} xl_heap_base_shift; + +#define SizeOfHeapBaseShift (offsetof(xl_heap_base_shift, multi) + sizeof(bool)) + +extern void HeapTupleHeaderAdvanceLatestRemovedXid(HeapTuple tuple, TransactionId *latestRemovedXid); extern void heap_redo(XLogReaderState *record); @@ -399,12 +410,15 @@ extern void heap_mask(char *pagedata, BlockNumber blkno); extern void heap2_redo(XLogReaderState *record); extern void heap2_desc(StringInfo buf, XLogReaderState *record); extern const char *heap2_identify(uint8 info); +extern void heap3_redo(XLogReaderState *record); +extern void heap3_desc(StringInfo buf, XLogReaderState *record); +extern const char *heap3_identify(uint8 info); extern void heap_xlog_logical_rewrite(XLogReaderState *r); extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples); -extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, +extern bool heap_prepare_freeze_tuple(HeapTuple htup, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId cutoff_xid, @@ -413,8 +427,10 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, bool *totally_frozen, TransactionId *relfrozenxid_out, MultiXactId *relminmxid_out); -extern void heap_execute_freeze_tuple(HeapTupleHeader tuple, +extern void heap_execute_freeze_tuple(HeapTuple tuple, xl_heap_freeze_tuple *xlrec_tp); +extern void heap_execute_freeze_tuple_page(Page page, HeapTupleHeader htup, + xl_heap_freeze_tuple *xlrec_tp); extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags); diff --git a/src/include/access/heaptoast.h b/src/include/access/heaptoast.h index a75699054af..8c599ad7d90 100644 --- a/src/include/access/heaptoast.h +++ b/src/include/access/heaptoast.h @@ -22,7 +22,7 @@ */ #define MaximumBytesPerTuple(tuplesPerPage) \ MAXALIGN_DOWN((BLCKSZ - \ - MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData))) \ + MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData))) \ / (tuplesPerPage)) /* diff --git a/src/include/access/htup.h b/src/include/access/htup.h index a4bc7256ed5..bd77a018f5e 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -54,6 +54,12 @@ typedef MinimalTupleData *MinimalTuple; * this can't be told apart from case #1 by inspection; code setting up * or destroying this representation has to know what it's doing. * + * t_xmin and t_xmax are TransactionId values stored in heap tuple header. + * Normally they are calculated from ShortTransactionId-sized on-disk tuple + * xmin/xmax representation: + * t_data->t_choice.t_heap.t_xmin/t_data->t_choice.t_heap.t_xmin + * and pd_xid_base and pd_multi_base commmon values for all tuples on a page. + * * t_len should always be valid, except in the pointer-to-nothing case. * t_self and t_tableOid should be valid if the HeapTupleData points to * a disk buffer, or if it represents a copy of a tuple on disk. They @@ -61,10 +67,12 @@ typedef MinimalTupleData *MinimalTuple; */ typedef struct HeapTupleData { + TransactionId t_xmin; /* base value for normal transaction ids */ + TransactionId t_xmax; /* base value for mutlixact */ uint32 t_len; /* length of *t_data */ ItemPointerData t_self; /* SelfItemPointer */ Oid t_tableOid; /* table the tuple came from */ -#define FIELDNO_HEAPTUPLEDATA_DATA 3 +#define FIELDNO_HEAPTUPLEDATA_DATA 5 HeapTupleHeader t_data; /* -> tuple header and data */ } HeapTupleData; @@ -78,12 +86,12 @@ typedef HeapTupleData *HeapTuple; #define HeapTupleIsValid(tuple) PointerIsValid(tuple) /* HeapTupleHeader functions implemented in utils/time/combocid.c */ -extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup); -extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup); -extern void HeapTupleHeaderAdjustCmax(HeapTupleHeader tup, +extern CommandId HeapTupleGetCmin(HeapTuple tup); +extern CommandId HeapTupleGetCmax(HeapTuple tup); +extern void HeapTupleHeaderAdjustCmax(HeapTuple tup, CommandId *cmax, bool *iscombo); /* Prototype for HeapTupleHeader accessors in heapam.c */ -extern TransactionId HeapTupleGetUpdateXid(HeapTupleHeader tuple); +extern TransactionId HeapTupleGetUpdateXid(HeapTuple tuple); #endif /* HTUP_H */ diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h index 51a60eda088..db523205463 100644 --- a/src/include/access/htup_details.h +++ b/src/include/access/htup_details.h @@ -120,13 +120,13 @@ typedef struct HeapTupleFields { - TransactionId t_xmin; /* inserting xact ID */ - TransactionId t_xmax; /* deleting or locking xact ID */ + ShortTransactionId t_xmin; /* inserting xact ID */ + ShortTransactionId t_xmax; /* deleting or locking xact ID */ union { CommandId t_cid; /* inserting or deleting command ID, or both */ - TransactionId t_xvac; /* old-style VACUUM FULL xact ID */ + ShortTransactionId t_xvac; /* old-style VACUUM FULL xact ID */ } t_field3; } HeapTupleFields; @@ -222,7 +222,7 @@ struct HeapTupleHeaderData * HEAP_XMAX_LOCK_ONLY bit is set; or, for pg_upgrade's sake, if the Xmax is * not a multi and the EXCL_LOCK bit is set. * - * See also HeapTupleHeaderIsOnlyLocked, which also checks for a possible + * See also HeapTupleIsOnlyLocked, which also checks for a possible * aborted updater transaction. * * Beware of multiple evaluations of the argument. @@ -298,27 +298,79 @@ struct HeapTupleHeaderData */ /* - * HeapTupleHeaderGetRawXmin returns the "raw" xmin field, which is the xid + * Copy base values for xid and multixacts from one heap tuple to heap tuple. + * Should be called on tuple copy or making desc tuple on the base on src tuple + * saving visibility information. + */ +#define HeapTupleCopyBase(dest, src) \ +{ \ + (dest)->t_xmin = (src)->t_xmin; \ + (dest)->t_xmax = (src)->t_xmax; \ +} + +/* + * Set base values for tuple xids/multixacts to zero. Used when visibility + * infromation is negligible or will be set later. + */ +#define HeapTupleSetZeroBase(tup) \ +{ \ + (tup)->t_xmin = 0; \ + (tup)->t_xmax = 0; \ +} + +/* + * Copy HeapTupleHeader xmin/xmax in raw way ??? + */ +#define HeapTupleCopyHeaderXids(tup) \ +{ \ + (tup)->t_xmin = (tup)->t_data->t_choice.t_heap.t_xmin; \ + (tup)->t_xmax = (tup)->t_data->t_choice.t_heap.t_xmax; \ +} + +/* + * Macros for accessing "double xmax". On pg_upgraded instances, it might + * happend that we can't fit new special area to the page. But we still + * might neep to write xmax of tuples for updates and deletes. The trick is + * that we actually don't need xmin field. After pg_upgrade (wich implies + * restart) no insertions went to this page yet (otherwise special area could + * fit). So, if tuple is visible (othewise it would be deleted), then it's + * visible for everybody. Thus, t_xmin isn't needed. Therefore, we can use + * both t_xmin and t_xmax to store 64-bit xmax. + * + * See heap_convert.c for details. + */ +#define HeapTupleHeaderGetDoubleXmax(tup) \ + ((TransactionId)(tup)->t_choice.t_heap.t_xmax + \ + ((TransactionId)(tup)->t_choice.t_heap.t_xmin << 32)) + +#define HeapTupleHeaderSetDoubleXmax(tup, xid) \ +do { \ + (tup)->t_choice.t_heap.t_xmax = (TransactionId) (xid) & 0xFFFFFFFF; \ + (tup)->t_choice.t_heap.t_xmin = ((TransactionId) (xid) >> 32) & 0xFFFFFFFF; \ +} while (0) + +/* + * HeapTupleGetRawXmin returns the "raw" xmin field, which is the xid * originally used to insert the tuple. However, the tuple might actually * be frozen (via HeapTupleHeaderSetXminFrozen) in which case the tuple's xmin * is visible to every snapshot. Prior to PostgreSQL 9.4, we actually changed * the xmin to FrozenTransactionId, and that value may still be encountered * on disk. */ -#define HeapTupleHeaderGetRawXmin(tup) \ -( \ - (tup)->t_choice.t_heap.t_xmin \ -) +#define HeapTupleGetRawXmin(tup) ((tup)->t_xmin) -#define HeapTupleHeaderGetXmin(tup) \ +#define HeapTupleGetXmin(tup) \ ( \ - HeapTupleHeaderXminFrozen(tup) ? \ - FrozenTransactionId : HeapTupleHeaderGetRawXmin(tup) \ + HeapTupleHeaderXminFrozen((tup)->t_data) ? \ + FrozenTransactionId : HeapTupleGetRawXmin(tup) \ ) -#define HeapTupleHeaderSetXmin(tup, xid) \ +#define HeapTupleSetXmin(tup, xid) ((tup)->t_xmin = (xid)) + +#define HeapTupleHeaderSetXmin(page, tup) \ ( \ - (tup)->t_choice.t_heap.t_xmin = (xid) \ + AssertMacro(!HeapPageIsDoubleXmax(page)), \ + (tup)->t_data->t_choice.t_heap.t_xmin = NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base, (tup)->t_xmin) \ ) #define HeapTupleHeaderXminCommitted(tup) \ @@ -337,18 +389,6 @@ struct HeapTupleHeaderData ((tup)->t_infomask & (HEAP_XMIN_FROZEN)) == HEAP_XMIN_FROZEN \ ) -#define HeapTupleHeaderSetXminCommitted(tup) \ -( \ - AssertMacro(!HeapTupleHeaderXminInvalid(tup)), \ - ((tup)->t_infomask |= HEAP_XMIN_COMMITTED) \ -) - -#define HeapTupleHeaderSetXminInvalid(tup) \ -( \ - AssertMacro(!HeapTupleHeaderXminCommitted(tup)), \ - ((tup)->t_infomask |= HEAP_XMIN_INVALID) \ -) - #define HeapTupleHeaderSetXminFrozen(tup) \ ( \ AssertMacro(!HeapTupleHeaderXminInvalid(tup)), \ @@ -362,30 +402,47 @@ struct HeapTupleHeaderData * to resolve the MultiXactId if necessary. This might involve multixact I/O, * so it should only be used if absolutely necessary. */ -#define HeapTupleHeaderGetUpdateXid(tup) \ +#define HeapTupleGetUpdateXidAny(tup) \ ( \ - (!((tup)->t_infomask & HEAP_XMAX_INVALID) && \ - ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) && \ - !((tup)->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \ + (!((tup)->t_data->t_infomask & HEAP_XMAX_INVALID) && \ + ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) && \ + !((tup)->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY)) ? \ HeapTupleGetUpdateXid(tup) \ : \ - HeapTupleHeaderGetRawXmax(tup) \ + HeapTupleGetRawXmax(tup) \ ) -#define HeapTupleHeaderGetRawXmax(tup) \ -( \ - (tup)->t_choice.t_heap.t_xmax \ -) +#define HeapTupleGetRawXmax(tup) ((tup)->t_xmax) -#define HeapTupleHeaderSetXmax(tup, xid) \ +#define HeapTupleHeaderGetRawXmax(page, tup) \ ( \ - (tup)->t_choice.t_heap.t_xmax = (xid) \ + HeapPageIsDoubleXmax(page) ? \ + HeapTupleHeaderGetDoubleXmax(tup) : \ + ShortTransactionIdToNormal( \ + ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) ? HeapPageGetSpecial(page)->pd_multi_base : HeapPageGetSpecial(page)->pd_xid_base, \ + (tup)->t_choice.t_heap.t_xmax) \ ) +#define HeapTupleSetXmax(tup, xid) \ +do { \ + (tup)->t_xmax = (xid); \ +} while (0) + +#define HeapTupleHeaderSetXmax(page, tup) \ +do { \ + if (HeapPageIsDoubleXmax(page)) \ + HeapTupleHeaderSetDoubleXmax((tup)->t_data, (tup)->t_xmax); \ + else \ + (tup)->t_data->t_choice.t_heap.t_xmax = \ + NormalTransactionIdToShort( \ + ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) ? HeapPageGetSpecial(page)->pd_multi_base : HeapPageGetSpecial(page)->pd_xid_base, \ + ((tup)->t_xmax)); \ +} while (0) + /* * HeapTupleHeaderGetRawCommandId will give you what's in the header whether - * it is useful or not. Most code should use HeapTupleHeaderGetCmin or - * HeapTupleHeaderGetCmax instead, but note that those Assert that you can + * it is useful or not. Most code should use HeapTupleGetCmin or + * HeapTupleGetCmax instead, but note that those Assert that you can * get a legitimate result, ie you are in the originating transaction! */ #define HeapTupleHeaderGetRawCommandId(tup) \ @@ -555,8 +612,16 @@ do { \ * an otherwise-empty page can indeed hold a tuple of this size. Because * ItemIds and tuples have different alignment requirements, don't assume that * you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page. + * + * On shift to 64-bit XIDs MaxHeapTupleSize decreased by sizeof(HeapPageSpecialData). + * Extant tuples with length over new MaxHeapTupleSize are inherited on DoubleXmax + * pages. They could be read, but can not be updated unless their length decreases + * to fit MaxHeapTupleSize. Vacuum full will also copy these double xmax pages + * without change. */ -#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData))) + +#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData))) +#define MaxHeapTupleSize_32 (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData))) #define MinHeapTupleSize MAXALIGN(SizeofHeapTupleHeader) /* @@ -690,6 +755,48 @@ struct MinimalTupleData #define HeapTupleClearHeapOnly(tuple) \ HeapTupleHeaderClearHeapOnly((tuple)->t_data) +/* + * Copy base values for xid and multixacts from page to heap tuple. Should be + * called each time tuple is read from page. Otherwise, it would be impossible + * to correctly read tuple xmin and xmax. + */ +static inline void +HeapTupleCopyBaseFromPage(HeapTuple tup, void *page) +{ + TransactionId base; + HeapTupleHeader tup_hdr = tup->t_data; + + if (HeapPageIsDoubleXmax(page)) + { + tup->t_xmin = FrozenTransactionId; + tup->t_xmax = HeapTupleHeaderGetDoubleXmax(tup_hdr); + } + else + { + if (HeapTupleHeaderXminFrozen(tup_hdr)) + tup->t_xmin = FrozenTransactionId; + else if (TransactionIdIsNormal(tup_hdr->t_choice.t_heap.t_xmin)) + { + base = HeapPageGetSpecial(page)->pd_xid_base; + tup->t_xmin = ShortTransactionIdToNormal(base, + tup_hdr->t_choice.t_heap.t_xmin); + } + else + tup->t_xmin = (TransactionId) tup_hdr->t_choice.t_heap.t_xmin; + + if (TransactionIdIsNormal(tup_hdr->t_choice.t_heap.t_xmax)) + { + base = (tup_hdr->t_infomask & HEAP_XMAX_IS_MULTI) ? + HeapPageGetSpecial(page)->pd_multi_base : + HeapPageGetSpecial(page)->pd_xid_base; + tup->t_xmax = ShortTransactionIdToNormal(base, + tup_hdr->t_choice.t_heap.t_xmax); + } + else + tup->t_xmax = (TransactionId) tup_hdr->t_choice.t_heap.t_xmax; + } +} + /* prototypes for functions in common/heaptuple.c */ extern Size heap_compute_data_size(TupleDesc tupleDesc, Datum *values, bool *isnull); diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index a5600a320ae..a325693b3d3 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -18,16 +18,16 @@ /* * The first two MultiXactId values are reserved to store the truncation Xid - * and epoch of the first segment, so we start assigning multixact values from + * and base of the first segment, so we start assigning multixact values from * 2. */ -#define InvalidMultiXactId ((MultiXactId) 0) -#define FirstMultiXactId ((MultiXactId) 1) -#define MaxMultiXactId ((MultiXactId) 0xFFFFFFFF) +#define InvalidMultiXactId UINT64CONST(0) +#define FirstMultiXactId UINT64CONST(1) +#define MaxMultiXactId UINT64CONST(0xFFFFFFFFFFFFFFFF) #define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId) -#define MaxMultiXactOffset ((MultiXactOffset) 0xFFFFFFFF) +#define MaxMultiXactOffset UINT64CONST(0xFFFFFFFFFFFFFFFF) /* Number of SLRU buffers to use for multixact */ #define NUM_MULTIXACTOFFSET_BUFFERS 8 @@ -146,7 +146,6 @@ extern void MultiXactSetNextMXact(MultiXactId nextMulti, extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset); extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); -extern int MultiXactMemberFreezeThreshold(void); extern void multixact_twophase_recover(TransactionId xid, uint16 info, void *recdata, uint32 len); diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 93f8267b483..dd229803ef1 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -62,8 +62,10 @@ typedef uint16 BTCycleId; typedef struct BTPageOpaqueData { BlockNumber btpo_prev; /* left sibling, or P_NONE if leftmost */ + /* ... or next transaction ID (lower part) */ BlockNumber btpo_next; /* right sibling, or P_NONE if rightmost */ uint32 btpo_level; /* tree level --- zero for leaf pages */ + /* ... or next transaction ID (lower part) */ uint16 btpo_flags; /* flag bits, see below */ BTCycleId btpo_cycleid; /* vacuum cycle ID of latest split */ } BTPageOpaqueData; @@ -92,6 +94,14 @@ typedef BTPageOpaqueData *BTPageOpaque; */ #define MAX_BT_CYCLE_ID 0xFF7F +/* Macros for access xact */ +#define BTP_GET_XACT(opaque) (((uint64) ((BTPageOpaque) opaque)->btpo_prev << 32) | \ + (uint64) ((BTPageOpaque) opaque)->btpo_level) +#define BTP_SET_XACT(opaque, xact) \ +do { \ + ((BTPageOpaque) opaque)->btpo_prev = (uint32) (xact >> 32); \ + ((BTPageOpaque) opaque)->btpo_level = (uint32) xact; \ +} while (0) /* * The Meta page is always the first page in the btree index. diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h index 3e27790b3f0..4b900279b78 100644 --- a/src/include/access/rewriteheap.h +++ b/src/include/access/rewriteheap.h @@ -51,7 +51,7 @@ typedef struct LogicalRewriteMappingData * 6) xid of the xact performing the mapping * --- */ -#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x-%x" -extern void CheckPointLogicalRewriteHeap(void); +#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x_%x-%x_%x" +extern void CheckPointLogicalRewriteHeap(void); #endif /* REWRITE_HEAP_H */ diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h index 9a74721c97c..bf748ea6cba 100644 --- a/src/include/access/rmgrlist.h +++ b/src/include/access/rmgrlist.h @@ -47,3 +47,4 @@ PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_i PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL, NULL) PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask, NULL) PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL, logicalmsg_decode) +PG_RMGR(RM_HEAP3_ID, "Heap3", heap3_redo, heap3_desc, heap3_identify, NULL, NULL, heap_mask, NULL) diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 4f5a324da2a..767854419dc 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -21,15 +21,7 @@ /* * Define SLRU segment size. A page is the same BLCKSZ as is used everywhere * else in Postgres. The segment size can be chosen somewhat arbitrarily; - * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG - * or 64K transactions for SUBTRANS. - * - * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, - * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where - * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at - * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need - * take no explicit notice of that fact in slru.c, except when comparing - * segment and page numbers in SimpleLruTruncate (see PagePrecedes()). + * we make it 32 pages by default. */ #define SLRU_PAGES_PER_SEGMENT 32 diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index fe869c6c184..f4fd0433411 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -118,7 +118,7 @@ typedef enum TM_Result * cmax is the outdating command's CID, but only when the failure code is * TM_SelfModified (i.e., something in the current transaction outdated the * tuple); otherwise cmax is zero. (We make this restriction because - * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other + * HeapTupleGetCmax doesn't work for tuples outdated in other * transactions.) */ typedef struct TM_FailureData diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 338dfca5a0b..f2c49ef41df 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -17,6 +17,10 @@ #include "access/xlogdefs.h" +#ifndef FRONTEND +#include "utils/elog.h" +#endif + /* ---------------- * Special transaction ID values * @@ -28,11 +32,12 @@ * Note: if you need to change it, you must change pg_class.h as well. * ---------------- */ -#define InvalidTransactionId ((TransactionId) 0) -#define BootstrapTransactionId ((TransactionId) 1) -#define FrozenTransactionId ((TransactionId) 2) -#define FirstNormalTransactionId ((TransactionId) 3) -#define MaxTransactionId ((TransactionId) 0xFFFFFFFF) +#define InvalidTransactionId UINT64CONST(0) +#define BootstrapTransactionId UINT64CONST(1) +#define FrozenTransactionId UINT64CONST(2) +#define FirstNormalTransactionId UINT64CONST(3) +#define MaxTransactionId UINT64CONST(0xFFFFFFFFFFFFFFFF) +#define MaxShortTransactionId ((TransactionId) 0x7FFFFFFF) /* ---------------- * transaction ID manipulation macros @@ -44,17 +49,48 @@ #define TransactionIdStore(xid, dest) (*(dest) = (xid)) #define StoreInvalidTransactionId(dest) (*(dest) = InvalidTransactionId) -#define EpochFromFullTransactionId(x) ((uint32) ((x).value >> 32)) -#define XidFromFullTransactionId(x) ((uint32) (x).value) -#define U64FromFullTransactionId(x) ((x).value) +/* + * Convert short xid from/to full xid. Assertion should fail if we full xid + * doesn't fit to xid base. + */ +static inline TransactionId +ShortTransactionIdToNormal(TransactionId base, ShortTransactionId xid) +{ + if (!TransactionIdIsNormal(xid)) + return (TransactionId) xid; + +#ifndef FRONTEND + /* xid + base should not overflow TransactionId */ + Assert(xid + base >= base); +#endif + + return (TransactionId) (xid + base); +} + +static inline ShortTransactionId +NormalTransactionIdToShort(TransactionId base, TransactionId xid) +{ + if (!TransactionIdIsNormal(xid)) + return (ShortTransactionId) (xid); + +#ifndef FRONTEND + /* xid should fit ShortTransactionId */ + Assert(xid >= base + FirstNormalTransactionId && + xid <= base + MaxShortTransactionId); +#endif + + return (ShortTransactionId) (xid - base); +} + +#define XidFromFullTransactionId(x) ((x).value) #define FullTransactionIdEquals(a, b) ((a).value == (b).value) #define FullTransactionIdPrecedes(a, b) ((a).value < (b).value) #define FullTransactionIdPrecedesOrEquals(a, b) ((a).value <= (b).value) #define FullTransactionIdFollows(a, b) ((a).value > (b).value) #define FullTransactionIdFollowsOrEquals(a, b) ((a).value >= (b).value) #define FullTransactionIdIsValid(x) TransactionIdIsValid(XidFromFullTransactionId(x)) -#define InvalidFullTransactionId FullTransactionIdFromEpochAndXid(0, InvalidTransactionId) -#define FirstNormalFullTransactionId FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId) +#define InvalidFullTransactionId FullTransactionIdFromXid(InvalidTransactionId) +#define FirstNormalFullTransactionId FullTransactionIdFromXid(FirstNormalTransactionId) #define FullTransactionIdIsNormal(x) FullTransactionIdFollowsOrEquals(x, FirstNormalFullTransactionId) /* @@ -68,21 +104,11 @@ typedef struct FullTransactionId } FullTransactionId; static inline FullTransactionId -FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid) -{ - FullTransactionId result; - - result.value = ((uint64) epoch) << 32 | xid; - - return result; -} - -static inline FullTransactionId -FullTransactionIdFromU64(uint64 value) +FullTransactionIdFromXid(TransactionId xid) { FullTransactionId result; - result.value = value; + result.value = xid; return result; } @@ -91,8 +117,7 @@ FullTransactionIdFromU64(uint64 value) #define TransactionIdAdvance(dest) \ do { \ (dest)++; \ - if ((dest) < FirstNormalTransactionId) \ - (dest) = FirstNormalTransactionId; \ + Assert(TransactionIdIsNormal(dest)); \ } while(0) /* @@ -140,18 +165,19 @@ FullTransactionIdAdvance(FullTransactionId *dest) /* back up a transaction ID variable, handling wraparound correctly */ #define TransactionIdRetreat(dest) \ do { \ + Assert(TransactionIdIsNormal(dest)); \ (dest)--; \ - } while ((dest) < FirstNormalTransactionId) + } while(0) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdPrecedes(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ - (int32) ((id1) - (id2)) < 0) + (int64) ((id1) - (id2)) < 0) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdFollows(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ - (int32) ((id1) - (id2)) > 0) + (int64) ((id1) - (id2)) > 0) /* ---------- * Object ID (OID) zero is InvalidOid. @@ -221,9 +247,6 @@ typedef struct VariableCacheData TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ TransactionId xidVacLimit; /* start forcing autovacuums here */ - TransactionId xidWarnLimit; /* start complaining here */ - TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */ - TransactionId xidWrapLimit; /* where the world ends */ Oid oldestXidDB; /* database with minimum datfrozenxid */ /* @@ -277,10 +300,6 @@ extern bool TransactionIdIsKnownCompleted(TransactionId transactionId); extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids); extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn); extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids); -extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2); -extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2); -extern bool TransactionIdFollows(TransactionId id1, TransactionId id2); -extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2); extern TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids); extern XLogRecPtr TransactionIdGetCommitLSN(TransactionId xid); diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h index 16c74a581e4..6fe837f4d18 100644 --- a/src/include/access/tupmacs.h +++ b/src/include/access/tupmacs.h @@ -150,10 +150,11 @@ ((attalign) == TYPALIGN_INT) ? INTALIGN(cur_offset) : \ (((attalign) == TYPALIGN_CHAR) ? (uintptr_t) (cur_offset) : \ (((attalign) == TYPALIGN_DOUBLE) ? DOUBLEALIGN(cur_offset) : \ + (((attalign) == TYPALIGN_XID) ? MAXALIGN(cur_offset) : \ ( \ AssertMacro((attalign) == TYPALIGN_SHORT), \ SHORTALIGN(cur_offset) \ - ))) \ + )))) \ ) /* diff --git a/src/include/access/xact.h b/src/include/access/xact.h index 4794941df31..f8a370043f6 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -236,7 +236,7 @@ typedef struct xl_xact_xinfo * Commit records can be large, so copying large portions isn't * attractive. */ - uint32 xinfo; + uint64 xinfo; } xl_xact_xinfo; typedef struct xl_xact_dbinfo @@ -289,7 +289,12 @@ typedef struct xl_xact_invals typedef struct xl_xact_twophase { - TransactionId xid; + /* + * TransactionId is split into 32-bit parts because xl_xact_twophase is + * only int-aligned. + */ + uint32 xid_lo; + uint32 xid_hi; } xl_xact_twophase; typedef struct xl_xact_origin @@ -308,7 +313,7 @@ typedef struct xl_xact_commit /* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */ /* xl_xact_stats_items follows if XINFO_HAS_DROPPED_STATS */ /* xl_xact_invals follows if XINFO_HAS_INVALS */ - /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */ /* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */ /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ } xl_xact_commit; @@ -324,7 +329,7 @@ typedef struct xl_xact_abort /* xl_xact_relfilenodes follows if XINFO_HAS_RELFILENODES */ /* xl_xact_stats_items follows if XINFO_HAS_DROPPED_STATS */ /* No invalidation messages needed. */ - /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */ /* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */ /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ } xl_xact_abort; diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index 5fc340c434b..8fca607fa33 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -37,6 +37,7 @@ * will be skipped) */ #define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image * is taken */ +#define REGBUF_CONVERTED 0x20 /* buffer had format convertion */ /* prototypes for public functions in xloginsert.c: */ extern void XLogBeginInsert(void); diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index e73ea4a8408..3320046ab81 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -423,10 +423,6 @@ extern bool DecodeXLogRecord(XLogReaderState *state, #define XLogRecBlockImageApply(decoder, block_id) \ ((decoder)->record->blocks[block_id].apply_image) -#ifndef FRONTEND -extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record); -#endif - extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page); extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len); extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h index 052ac6817a6..a680cd6b8a8 100644 --- a/src/include/access/xlogrecord.h +++ b/src/include/access/xlogrecord.h @@ -41,18 +41,17 @@ typedef struct XLogRecord { uint32 xl_tot_len; /* total len of entire record */ + pg_crc32c xl_crc; /* CRC for this record */ TransactionId xl_xid; /* xact id */ XLogRecPtr xl_prev; /* ptr to previous record in log */ uint8 xl_info; /* flag bits, see below */ RmgrId xl_rmid; /* resource manager for this record */ - /* 2 bytes of padding here, initialize to zero */ - pg_crc32c xl_crc; /* CRC for this record */ /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */ } XLogRecord; -#define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c)) +#define SizeOfXLogRecord (offsetof(XLogRecord, xl_rmid) + sizeof(RmgrId)) /* * The high 4 bits in xl_info may be used freely by rmgr. The diff --git a/src/include/c.h b/src/include/c.h index 4f16e589b3e..dacb0695612 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -75,6 +75,10 @@ #include #endif +#if HAVE_INTTYPES_H +#include "inttypes.h" +#endif + /* ---------------------------------------------------------------- * Section 1: compiler characteristics @@ -584,19 +588,29 @@ typedef double float8; typedef Oid regproc; typedef regproc RegProcedure; -typedef uint32 TransactionId; +typedef uint64 TransactionId; -typedef uint32 LocalTransactionId; +extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2); +extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2); +extern bool TransactionIdFollows(TransactionId id1, TransactionId id2); +extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2); -typedef uint32 SubTransactionId; +typedef uint32 ShortTransactionId; +typedef uint64 LocalTransactionId; +typedef uint64 SubTransactionId; -#define InvalidSubTransactionId ((SubTransactionId) 0) -#define TopSubTransactionId ((SubTransactionId) 1) +#define InvalidSubTransactionId ((SubTransactionId) 0) +#define TopSubTransactionId ((SubTransactionId) 1) /* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */ typedef TransactionId MultiXactId; -typedef uint32 MultiXactOffset; +typedef uint64 MultiXactOffset; + +#define MAX_START_XID UINT64CONST(0x3FFFFFFFFFFFFFFF) /* 2^62 - 1 */ +#define StartTransactionIdIsValid(xid) ((xid) <= MAX_START_XID) +#define StartMultiXactIdIsValid(mxid) ((mxid) <= MAX_START_XID) +#define StartMultiXactOffsetIsValid(mxoff) ((mxoff) <= MAX_START_XID) typedef uint32 CommandId; @@ -781,7 +795,6 @@ typedef NameData *Name; /* we don't currently need wider versions of the other ALIGN macros */ #define MAXALIGN64(LEN) TYPEALIGN64(MAXIMUM_ALIGNOF, (LEN)) - /* ---------------------------------------------------------------- * Section 6: assertions * ---------------------------------------------------------------- diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index 4cc129bebd8..4f209776350 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -403,9 +403,9 @@ amprocrighttype => 'bytea', amprocnum => '2', amproc => 'hashvarlenaextended' }, { amprocfamily => 'hash/xid_ops', amproclefttype => 'xid', - amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint4' }, + amprocrighttype => 'xid', amprocnum => '1', amproc => 'hashint8' }, { amprocfamily => 'hash/xid_ops', amproclefttype => 'xid', - amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint4extended' }, + amprocrighttype => 'xid', amprocnum => '2', amproc => 'hashint8extended' }, { amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8', amprocrighttype => 'xid8', amprocnum => '1', amproc => 'hashint8' }, { amprocfamily => 'hash/xid8_ops', amproclefttype => 'xid8', diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 06368e23667..ab02a0896f6 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -247,4 +247,10 @@ typedef struct ControlFileData */ #define PG_CONTROL_FILE_SIZE 8192 +#define CONTROLFILE_GET_OLDEDITION(control) \ + ((control)->pg_old_version >> 16) + +#define CONTROLFILE_SET_OLDEDITION(control, v) \ + (control)->pg_old_version = ((v) << 16) + #endif /* PG_CONTROL_H */ diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index bc5f8213f3a..912ab48ecf4 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -183,16 +183,16 @@ oprresult => 'bool', oprcom => '=(xid,xid)', oprnegate => '<>(xid,xid)', oprcode => 'xideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' }, { oid => '353', descr => 'equal', - oprname => '=', oprleft => 'xid', oprright => 'int4', oprresult => 'bool', - oprnegate => '<>(xid,int4)', oprcode => 'xideqint4', oprrest => 'eqsel', + oprname => '=', oprleft => 'xid', oprright => 'int8', oprresult => 'bool', + oprnegate => '<>(xid,int8)', oprcode => 'xideqint8', oprrest => 'eqsel', oprjoin => 'eqjoinsel' }, { oid => '3315', descr => 'not equal', oprname => '<>', oprleft => 'xid', oprright => 'xid', oprresult => 'bool', oprcom => '<>(xid,xid)', oprnegate => '=(xid,xid)', oprcode => 'xidneq', oprrest => 'neqsel', oprjoin => 'neqjoinsel' }, { oid => '3316', descr => 'not equal', - oprname => '<>', oprleft => 'xid', oprright => 'int4', oprresult => 'bool', - oprnegate => '=(xid,int4)', oprcode => 'xidneqint4', oprrest => 'neqsel', + oprname => '<>', oprleft => 'xid', oprright => 'int8', oprresult => 'bool', + oprnegate => '=(xid,int8)', oprcode => 'xidneqint8', oprrest => 'neqsel', oprjoin => 'neqjoinsel' }, { oid => '5068', descr => 'equal', oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'xid8', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 87aa571a331..53eefc30d60 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -2366,10 +2366,10 @@ { oid => '1181', descr => 'age of a transaction ID, in transactions before current transaction', proname => 'age', provolatile => 's', proparallel => 'r', - prorettype => 'int4', proargtypes => 'xid', prosrc => 'xid_age' }, + prorettype => 'int8', proargtypes => 'xid', prosrc => 'xid_age' }, { oid => '3939', descr => 'age of a multi-transaction ID, in multi-transactions before current multi-transaction', - proname => 'mxid_age', provolatile => 's', prorettype => 'int4', + proname => 'mxid_age', provolatile => 's', prorettype => 'int8', proargtypes => 'xid', prosrc => 'mxid_age' }, { oid => '1188', @@ -2704,11 +2704,11 @@ prosrc => 'bpcharlen' }, { oid => '1319', - proname => 'xideqint4', proleakproof => 't', prorettype => 'bool', - proargtypes => 'xid int4', prosrc => 'xideq' }, + proname => 'xideqint8', proleakproof => 't', prorettype => 'bool', + proargtypes => 'xid int8', prosrc => 'xideq' }, { oid => '3309', - proname => 'xidneqint4', proleakproof => 't', prorettype => 'bool', - proargtypes => 'xid int4', prosrc => 'xidneq' }, + proname => 'xidneqint8', proleakproof => 't', prorettype => 'bool', + proargtypes => 'xid int8', prosrc => 'xidneq' }, { oid => '1326', proname => 'interval_div', prorettype => 'interval', diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index df458794635..9ecd608aa9c 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -95,9 +95,9 @@ typinput => 'tidin', typoutput => 'tidout', typreceive => 'tidrecv', typsend => 'tidsend', typalign => 's' }, { oid => '28', array_type_oid => '1011', descr => 'transaction id', - typname => 'xid', typlen => '4', typbyval => 't', typcategory => 'U', + typname => 'xid', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', typcategory => 'U', typinput => 'xidin', typoutput => 'xidout', typreceive => 'xidrecv', - typsend => 'xidsend', typalign => 'i' }, + typsend => 'xidsend', typalign => 'x' }, { oid => '29', array_type_oid => '1012', descr => 'command identifier type, sequence in transaction id', typname => 'cid', typlen => '4', typbyval => 't', typcategory => 'U', diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index 48a25591374..71f5f547f46 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -300,6 +300,11 @@ DECLARE_UNIQUE_INDEX(pg_type_typname_nsp_index, 2704, TypeNameNspIndexId, on pg_ #define TYPALIGN_SHORT 's' /* short alignment (typically 2 bytes) */ #define TYPALIGN_INT 'i' /* int alignment (typically 4 bytes) */ #define TYPALIGN_DOUBLE 'd' /* double alignment (often 8 bytes) */ +/* + * We need to use alignment sutable for 8-byte XID values. + * On system like AIX double alignment (4 bytes) is not enough. + */ +#define TYPALIGN_XID 'x' #define TYPSTORAGE_PLAIN 'p' /* type not prepared for toasting */ #define TYPSTORAGE_EXTERNAL 'e' /* toastable, don't try to compress */ diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index f38e1148f97..134a0afca6c 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -213,12 +213,12 @@ typedef enum VacOptValue */ typedef struct VacuumParams { - bits32 options; /* bitmask of VACOPT_* */ - int freeze_min_age; /* min freeze age, -1 to use default */ - int freeze_table_age; /* age at which to scan whole table */ - int multixact_freeze_min_age; /* min multixact freeze age, -1 to + bits32 options; /* bitmask of VacuumOption */ + int64 freeze_min_age; /* min freeze age, -1 to use default */ + int64 freeze_table_age; /* age at which to scan whole table */ + int64 multixact_freeze_min_age; /* min multixact freeze age, -1 to * use default */ - int multixact_freeze_table_age; /* multixact age at which to scan + int64 multixact_freeze_table_age; /* multixact age at which to scan * whole table */ bool is_wraparound; /* force a for-wraparound vacuum */ int log_min_duration; /* minimum execution threshold in ms at @@ -252,12 +252,12 @@ typedef struct VacDeadItems /* GUC parameters */ extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */ -extern PGDLLIMPORT int vacuum_freeze_min_age; -extern PGDLLIMPORT int vacuum_freeze_table_age; -extern PGDLLIMPORT int vacuum_multixact_freeze_min_age; -extern PGDLLIMPORT int vacuum_multixact_freeze_table_age; -extern PGDLLIMPORT int vacuum_failsafe_age; -extern PGDLLIMPORT int vacuum_multixact_failsafe_age; +extern PGDLLIMPORT int64 vacuum_freeze_min_age; +extern PGDLLIMPORT int64 vacuum_freeze_table_age; +extern PGDLLIMPORT int64 vacuum_multixact_freeze_min_age; +extern PGDLLIMPORT int64 vacuum_multixact_freeze_table_age; +extern PGDLLIMPORT int64 vacuum_failsafe_age; +extern PGDLLIMPORT int64 vacuum_multixact_failsafe_age; /* Variables for cost-based parallel vacuum */ extern PGDLLIMPORT pg_atomic_uint32 *VacuumSharedCostBalance; @@ -287,9 +287,9 @@ extern void vac_update_relstats(Relation relation, bool *minmulti_updated, bool in_outer_xact); extern bool vacuum_set_xid_limits(Relation rel, - int freeze_min_age, int freeze_table_age, - int multixact_freeze_min_age, - int multixact_freeze_table_age, + int64 freeze_min_age, int64 freeze_table_age, + int64 multixact_freeze_min_age, + int64 multixact_freeze_table_age, TransactionId *oldestXmin, MultiXactId *oldestMxact, TransactionId *freezeLimit, diff --git a/src/include/fmgr.h b/src/include/fmgr.h index 5314b737052..be577c0b47c 100644 --- a/src/include/fmgr.h +++ b/src/include/fmgr.h @@ -281,6 +281,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum); #define PG_GETARG_FLOAT4(n) DatumGetFloat4(PG_GETARG_DATUM(n)) #define PG_GETARG_FLOAT8(n) DatumGetFloat8(PG_GETARG_DATUM(n)) #define PG_GETARG_INT64(n) DatumGetInt64(PG_GETARG_DATUM(n)) +#define PG_GETARG_TRANSACTIONID(n) DatumGetTransactionId(PG_GETARG_DATUM(n)) /* use this if you want the raw, possibly-toasted input datum: */ #define PG_GETARG_RAW_VARLENA_P(n) ((struct varlena *) PG_GETARG_POINTER(n)) /* use this if you want the input datum de-toasted: */ @@ -367,6 +368,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum); #define PG_RETURN_FLOAT8(x) return Float8GetDatum(x) #define PG_RETURN_INT64(x) return Int64GetDatum(x) #define PG_RETURN_UINT64(x) return UInt64GetDatum(x) +#define PG_RETURN_TRANSACTIONID(x) return TransactionIdGetDatum(x) /* RETURN macros for other pass-by-ref types will typically look like this: */ #define PG_RETURN_BYTEA_P(x) PG_RETURN_POINTER(x) #define PG_RETURN_TEXT_P(x) PG_RETURN_POINTER(x) diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index b3b407579b0..ede294d7b6e 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -316,6 +316,7 @@ typedef enum NodeTag */ T_List, T_IntList, + T_Int64List, T_OidList, /* diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 2cb9d1371d9..3b98d254b5e 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -44,6 +44,7 @@ typedef union ListCell { void *ptr_value; int int_value; + int64 int64_value; Oid oid_value; } ListCell; @@ -168,6 +169,7 @@ list_length(const List *l) */ #define lfirst(lc) ((lc)->ptr_value) #define lfirst_int(lc) ((lc)->int_value) +#define lfirst_int64(lc) ((lc)->int64_value) #define lfirst_oid(lc) ((lc)->oid_value) #define lfirst_node(type,lc) castNode(type, lfirst(lc)) @@ -193,6 +195,7 @@ list_length(const List *l) #define llast(l) lfirst(list_last_cell(l)) #define llast_int(l) lfirst_int(list_last_cell(l)) +#define llast_int64(l) lfirst_int64(list_last_cell(l)) #define llast_oid(l) lfirst_oid(list_last_cell(l)) #define llast_node(type,l) castNode(type, llast(l)) @@ -538,6 +541,7 @@ extern List *list_make5_impl(NodeTag t, ListCell datum1, ListCell datum2, extern pg_nodiscard List *lappend(List *list, void *datum); extern pg_nodiscard List *lappend_int(List *list, int datum); +extern pg_nodiscard List *lappend_int64(List *list, int64 datum); extern pg_nodiscard List *lappend_oid(List *list, Oid datum); extern pg_nodiscard List *list_insert_nth(List *list, int pos, void *datum); diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index cdd742cb55b..eade86e00da 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -976,6 +976,9 @@ # endif #endif +/* Postgres Pro use 64bit xids */ +#undef XID_IS_64BIT + /* Size of a WAL file block. This need have no particular relation to BLCKSZ. XLOG_BLCKSZ must be a power of 2, and if your system supports O_DIRECT I/O, XLOG_BLCKSZ must be a multiple of the alignment requirement for direct-I/O diff --git a/src/include/postgres.h b/src/include/postgres.h index 31358110dca..e8b4b1f9dfb 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -555,21 +555,21 @@ typedef struct NullableDatum * Returns transaction identifier value of a datum. */ -#define DatumGetTransactionId(X) ((TransactionId) (X)) +#define DatumGetTransactionId(X) (DatumGetUInt64(X)) /* * TransactionIdGetDatum * Returns datum representation for a transaction identifier. */ -#define TransactionIdGetDatum(X) ((Datum) (X)) +#define TransactionIdGetDatum(X) (UInt64GetDatum(X)) /* * MultiXactIdGetDatum * Returns datum representation for a multixact identifier. */ -#define MultiXactIdGetDatum(X) ((Datum) (X)) +#define MultiXactIdGetDatum(X) (UInt64GetDatum(X)) /* * DatumGetCommandId diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index 9d40fd6d54b..03024361ea1 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -37,8 +37,8 @@ extern PGDLLIMPORT int autovacuum_vac_ins_thresh; extern PGDLLIMPORT double autovacuum_vac_ins_scale; extern PGDLLIMPORT int autovacuum_anl_thresh; extern PGDLLIMPORT double autovacuum_anl_scale; -extern PGDLLIMPORT int autovacuum_freeze_max_age; -extern PGDLLIMPORT int autovacuum_multixact_freeze_max_age; +extern PGDLLIMPORT int64 autovacuum_freeze_max_age; +extern PGDLLIMPORT int64 autovacuum_multixact_freeze_max_age; extern PGDLLIMPORT double autovacuum_vac_cost_delay; extern PGDLLIMPORT int autovacuum_vac_cost_limit; diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index a17e7b28a53..6e373e6ef06 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -40,10 +40,10 @@ */ #define BUF_REFCOUNT_ONE 1 #define BUF_REFCOUNT_MASK ((1U << 18) - 1) -#define BUF_USAGECOUNT_MASK 0x003C0000U +#define BUF_USAGECOUNT_MASK 0x001C0000U #define BUF_USAGECOUNT_ONE (1U << 18) #define BUF_USAGECOUNT_SHIFT 18 -#define BUF_FLAG_MASK 0xFFC00000U +#define BUF_FLAG_MASK 0xFFE00000U /* Get refcount and usagecount from buffer state */ #define BUF_STATE_GET_REFCOUNT(state) ((state) & BUF_REFCOUNT_MASK) @@ -55,6 +55,7 @@ * Note: BM_TAG_VALID essentially means that there is a buffer hashtable * entry associated with the buffer's tag. */ +#define BM_CONVERTED (1U << 21) /* buffer were converted to 64xid */ #define BM_LOCKED (1U << 22) /* buffer header is locked */ #define BM_DIRTY (1U << 23) /* data needs writing */ #define BM_VALID (1U << 24) /* data is valid */ diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 58391406f65..533b6a9009b 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -227,6 +227,8 @@ extern void BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum); extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std); +extern void MarkBufferConverted(Buffer buffer, bool converted); +extern bool IsBufferConverted(Buffer buffer); extern void UnlockBuffers(void); extern void LockBuffer(Buffer buffer, int mode); @@ -249,6 +251,8 @@ extern void TestForOldSnapshot_impl(Snapshot snapshot, Relation relation); extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype); extern void FreeAccessStrategy(BufferAccessStrategy strategy); +/* old tuple format support */ +extern void convert_page(Relation rel, Page orig_page, Buffer buf, BlockNumber blkno); /* inline functions */ diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index e9f253f2c8a..5319fe6c098 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -14,6 +14,7 @@ #ifndef BUFPAGE_H #define BUFPAGE_H +#include "access/transam.h" #include "access/xlogdefs.h" #include "storage/block.h" #include "storage/item.h" @@ -159,12 +160,110 @@ typedef struct PageHeaderData LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_special; /* offset to start of special space */ uint16 pd_pagesize_version; - TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ + ShortTransactionId pd_prune_xid; /* oldest prunable XID, or zero if + * none */ ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */ } PageHeaderData; typedef PageHeaderData *PageHeader; + +/* + * HeapPageSpecialData -- data that stored at the end of each heap page. + * + * pd_xid_base - base value for transaction IDs on page + * pd_multi_base - base value for multixact IDs on page + * + * pd_xid_base and pd_multi_base are base values for calculation of transaction + * identifiers from t_xmin and t_xmax in each heap tuple header on the page. + */ +typedef struct HeapPageSpecialData +{ + TransactionId pd_xid_base; /* base value for transaction IDs on page */ + TransactionId pd_multi_base; /* base value for multixact IDs on page */ +} HeapPageSpecialData; + +typedef HeapPageSpecialData * HeapPageSpecial; + +extern PGDLLIMPORT HeapPageSpecial doubleXmaxSpecial; + +/* + * Get pointer to HeapPageSpecialData without using pd_special of the page + * (for the sake of speed) assuming all heap pages have same size of special + * data. + * + * Return doubleXmaxSpecial when pd_special == BLCKSZ. See comment in bufpage.c + * for details. + */ +#define HeapPageGetSpecial(page) ( \ + (((PageHeader) (page))->pd_special == BLCKSZ) ? \ + ((HeapPageSpecial) doubleXmaxSpecial) : \ + (AssertMacro(((PageHeader) (page))->pd_special == BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData))), \ + (HeapPageSpecial) ((Pointer) (page) + BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData)))) \ +) + +/* + * Version of HeapPageGetSpecial() without assertions about pd_special. Used + * for non-consistent reads from non-locked pages. + */ +#define HeapPageGetSpecialNoAssert(page) ( \ + (((PageHeader) (page))->pd_special == BLCKSZ) ? \ + ((HeapPageSpecial) doubleXmaxSpecial) : \ + (HeapPageSpecial) ((Pointer) (page) + BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData))) \ +) + +/* Check if page is in "double xmax" format */ +#define HeapPageIsDoubleXmax(page) \ + (((PageHeader) (page))->pd_special == BLCKSZ) + +/* + * Set pd_prune_xid. + */ +static inline void +HeapPageSetPruneXid(Page page, TransactionId xid) +{ + if (HeapPageIsDoubleXmax(page)) + return; + + ((PageHeader) (page))->pd_prune_xid = + NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base, (xid)); + + Assert(((PageHeader) (page))->pd_prune_xid <= MaxShortTransactionId); +} + +/* + * Get pd_prune_xid from locked page. + */ +static inline TransactionId +HeapPageGetPruneXid(Page page) +{ + if (HeapPageIsDoubleXmax(page)) + return ((PageHeader) (page))->pd_prune_xid; + + return ShortTransactionIdToNormal(HeapPageGetSpecial(page)->pd_xid_base, + ((PageHeader) (page))->pd_prune_xid); +} + +/* + * Get pd_prune_xid from non-locked page. May return invalid value, but doen't + * causes assert failures. + */ +static inline TransactionId +HeapPageGetPruneXidNoAssert(Page page) +{ + if (HeapPageIsDoubleXmax(page)) + return ((PageHeader) (page))->pd_prune_xid; + + return ShortTransactionIdToNormal(HeapPageGetSpecialNoAssert(page)->pd_xid_base, + ((PageHeader) (page))->pd_prune_xid); +} + +#define XidFitsPage(page, xid) \ +( \ + (xid) >= HeapPageGetSpecial(page)->pd_xid_base + FirstNormalTransactionId && \ + (xid) <= HeapPageGetSpecial(page)->pd_xid_base + MaxShortTransactionId \ +) + /* * pd_flags contains the following flag bits. Undefined bits are initialized * to zero and may be used in the future. @@ -192,11 +291,13 @@ typedef PageHeaderData *PageHeader; * Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and * added the pd_flags field (by stealing some bits from pd_tli), * as well as adding the pd_prune_xid field (which enlarges the header). + * PgPro Enterprise 10 uses version number (0x00FF - 1), and should not + * collide with vanilla versions due to page conversion after pg_upgrade. * * As of Release 9.3, the checksum version must also be considered when * handling pages. */ -#define PG_PAGE_LAYOUT_VERSION 4 +#define PG_PAGE_LAYOUT_VERSION 5 #define PG_DATA_CHECKSUM_VERSION 1 /* ---------------------------------------------------------------- @@ -392,13 +493,11 @@ PageValidateSpecialPointer(Page page) #define PageSetPrunable(page, xid) \ do { \ Assert(TransactionIdIsNormal(xid)); \ - if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \ - TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \ - ((PageHeader) (page))->pd_prune_xid = (xid); \ + if (!HeapPageIsDoubleXmax(page) && \ + (!TransactionIdIsValid(HeapPageGetPruneXid(page)) || \ + TransactionIdPrecedes(xid, HeapPageGetPruneXid(page)))) \ + HeapPageSetPruneXid(page, xid); \ } while (0) -#define PageClearPrunable(page) \ - (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId) - /* ---------------------------------------------------------------- * extern declarations @@ -432,6 +531,20 @@ do { \ StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)), "BLCKSZ has to be a multiple of sizeof(size_t)"); +/* + * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete + */ +typedef struct ItemIdCompactData +{ + uint16 offsetindex; /* linp array index */ + int16 itemoff; /* page offset of item data */ + uint16 alignedlen; /* MAXALIGN(item data len) */ +} ItemIdCompactData; + +typedef ItemIdCompactData *ItemIdCompact; + +extern int itemoffcompare(const void *item1, const void *item2); + extern void PageInit(Page page, Size pageSize, Size specialSize); extern bool PageIsVerifiedExtended(Page page, BlockNumber blkno, int flags); extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, diff --git a/src/include/storage/itemid.h b/src/include/storage/itemid.h index e33637ff21e..442a72d658d 100644 --- a/src/include/storage/itemid.h +++ b/src/include/storage/itemid.h @@ -78,6 +78,8 @@ typedef uint16 ItemLength; #define ItemIdGetRedirect(itemId) \ ((itemId)->lp_off) +#define ItemIdGetTupleEnd(itemId) \ + (MAXALIGN(ItemIdGetLength((itemId))) + ItemIdGetOffset((itemId))) /* * ItemIdIsValid * True iff item identifier is valid. diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index e4e1495b245..0e0cd79bb03 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -227,8 +227,8 @@ typedef struct LOCKTAG /* ID info for a transaction is its TransactionId */ #define SET_LOCKTAG_TRANSACTION(locktag,xid) \ - ((locktag).locktag_field1 = (xid), \ - (locktag).locktag_field2 = 0, \ + ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \ + (locktag).locktag_field2 = (uint32)((xid) >> 32), \ (locktag).locktag_field3 = 0, \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_TRANSACTION, \ @@ -237,8 +237,8 @@ typedef struct LOCKTAG /* ID info for a virtual transaction is its VirtualTransactionId */ #define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \ ((locktag).locktag_field1 = (vxid).backendId, \ - (locktag).locktag_field2 = (vxid).localTransactionId, \ - (locktag).locktag_field3 = 0, \ + (locktag).locktag_field2 = (uint32)((vxid).localTransactionId & 0xFFFFFFFF), \ + (locktag).locktag_field3 = (uint32)((vxid).localTransactionId >> 32), \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) @@ -248,9 +248,9 @@ typedef struct LOCKTAG * its speculative insert counter. */ #define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \ - ((locktag).locktag_field1 = (xid), \ - (locktag).locktag_field2 = (token), \ - (locktag).locktag_field3 = 0, \ + ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \ + (locktag).locktag_field2 = (uint32)((xid) >> 32), \ + (locktag).locktag_field3 = (token), \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h index 6a7763264b0..6858e24bc90 100644 --- a/src/include/storage/standby.h +++ b/src/include/storage/standby.h @@ -21,7 +21,7 @@ #include "storage/standbydefs.h" /* User-settable GUC parameters */ -extern PGDLLIMPORT int vacuum_defer_cleanup_age; +extern PGDLLIMPORT int64 vacuum_defer_cleanup_age; extern PGDLLIMPORT int max_standby_archive_delay; extern PGDLLIMPORT int max_standby_streaming_delay; extern PGDLLIMPORT bool log_recovery_conflict_waits; diff --git a/src/include/utils/combocid.h b/src/include/utils/combocid.h index 80fe6d2ceac..8465768b6f1 100644 --- a/src/include/utils/combocid.h +++ b/src/include/utils/combocid.h @@ -15,7 +15,7 @@ #define COMBOCID_H /* - * HeapTupleHeaderGetCmin and HeapTupleHeaderGetCmax function prototypes + * HeapTupleGetCmin and HeapTupleGetCmax function prototypes * are in access/htup.h, because that's where the macro definitions that * those functions replaced used to be. */ diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 90b3c49bc12..7c4e34f87e3 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -296,12 +296,12 @@ typedef struct AutoVacOpts int vacuum_ins_threshold; int analyze_threshold; int vacuum_cost_limit; - int freeze_min_age; - int freeze_max_age; - int freeze_table_age; - int multixact_freeze_min_age; - int multixact_freeze_max_age; - int multixact_freeze_table_age; + int64 freeze_min_age; + int64 freeze_max_age; + int64 freeze_table_age; + int64 multixact_freeze_min_age; + int64 multixact_freeze_max_age; + int64 multixact_freeze_table_age; int log_min_duration; float8 vacuum_cost_delay; float8 vacuum_scale_factor; diff --git a/src/include/utils/xid8.h b/src/include/utils/xid8.h index b702fc1a910..284df5027b8 100644 --- a/src/include/utils/xid8.h +++ b/src/include/utils/xid8.h @@ -14,8 +14,8 @@ #include "access/transam.h" -#define DatumGetFullTransactionId(X) (FullTransactionIdFromU64(DatumGetUInt64(X))) -#define FullTransactionIdGetDatum(X) (UInt64GetDatum(U64FromFullTransactionId(X))) +#define DatumGetFullTransactionId(X) (FullTransactionIdFromXid(DatumGetUInt64(X))) +#define FullTransactionIdGetDatum(X) (UInt64GetDatum(XidFromFullTransactionId(X))) #define PG_GETARG_FULLTRANSACTIONID(X) DatumGetFullTransactionId(PG_GETARG_DATUM(X)) #define PG_RETURN_FULLTRANSACTIONID(X) return FullTransactionIdGetDatum(X) diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c index edb93ec1c4c..b29ef18645f 100644 --- a/src/pl/plperl/plperl.c +++ b/src/pl/plperl/plperl.c @@ -2667,7 +2667,7 @@ validate_plperl_function(plperl_proc_ptr *proc_ptr, HeapTuple procTup) * This is needed because CREATE OR REPLACE FUNCTION can modify the * function's pg_proc entry without changing its OID. ************************************************************/ - uptodate = (prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + uptodate = (prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)); if (uptodate) @@ -2791,7 +2791,7 @@ compile_plperl_function(Oid fn_oid, bool is_trigger, bool is_event_trigger) MemoryContextSetIdentifier(proc_cxt, prodesc->proname); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; - prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + prodesc->fn_xmin = HeapTupleGetRawXmin(procTup); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index b791c23f066..820db4c0e47 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -171,7 +171,7 @@ recheck: if (function) { /* We have a compiled function, but is it still valid? */ - if (function->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + if (function->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&function->fn_tid, &procTup->t_self)) function_valid = true; else @@ -348,7 +348,7 @@ do_compile(FunctionCallInfo fcinfo, function->fn_signature = format_procedure(fcinfo->flinfo->fn_oid); MemoryContextSetIdentifier(func_cxt, function->fn_signature); function->fn_oid = fcinfo->flinfo->fn_oid; - function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + function->fn_xmin = HeapTupleGetRawXmin(procTup); function->fn_tid = procTup->t_self; function->fn_input_collation = fcinfo->fncollation; function->fn_cxt = func_cxt; diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index 7bd2a9fff1c..9cde92cd917 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -7376,6 +7376,7 @@ deconstruct_composite_datum(Datum value, HeapTupleData *tmptup) tmptup->t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup->t_self)); tmptup->t_tableOid = InvalidOid; + HeapTupleSetZeroBase(tmptup); tmptup->t_data = td; /* Extract rowtype info and find a tupdesc */ @@ -7550,6 +7551,7 @@ exec_move_row_from_datum(PLpgSQL_execstate *estate, tmptup.t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroBase(&tmptup); tmptup.t_data = td; /* Extract rowtype info */ diff --git a/src/pl/plpython/plpy_procedure.c b/src/pl/plpython/plpy_procedure.c index 494f109b323..9884f74fa78 100644 --- a/src/pl/plpython/plpy_procedure.c +++ b/src/pl/plpython/plpy_procedure.c @@ -178,7 +178,7 @@ PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger) proc->proname = pstrdup(NameStr(procStruct->proname)); MemoryContextSetIdentifier(cxt, proc->proname); proc->pyname = pstrdup(procName); - proc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + proc->fn_xmin = HeapTupleGetRawXmin(procTup); proc->fn_tid = procTup->t_self; proc->fn_readonly = (procStruct->provolatile != PROVOLATILE_VOLATILE); proc->is_setof = procStruct->proretset; @@ -419,7 +419,7 @@ PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup) return false; /* If the pg_proc tuple has changed, it's not valid */ - if (!(proc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + if (!(proc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&proc->fn_tid, &procTup->t_self))) return false; diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c index 0dd6d8ab2c2..ac2354a0cb1 100644 --- a/src/pl/tcl/pltcl.c +++ b/src/pl/tcl/pltcl.c @@ -1429,7 +1429,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid, * function's pg_proc entry without changing its OID. ************************************************************/ if (prodesc != NULL && - prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)) { /* It's still up-to-date, so we can use it */ @@ -1496,7 +1496,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid, prodesc->internal_proname = pstrdup(internal_proname); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; - prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + prodesc->fn_xmin = HeapTupleGetRawXmin(procTup); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); diff --git a/src/test/Makefile b/src/test/Makefile index 69ef074d75e..53ddfffb94c 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -12,7 +12,8 @@ subdir = src/test top_builddir = ../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = perl regress isolation modules authentication recovery subscription +SUBDIRS = perl regress isolation modules authentication recovery subscription \ + xid-64 ifeq ($(with_icu),yes) SUBDIRS += icu diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index c8c7bc5045a..2da5c8dc02a 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -476,7 +476,9 @@ sub init mkdir $self->archive_dir; PostgreSQL::Test::Utils::system_or_bail('initdb', '-D', $pgdata, '-A', - 'trust', '-N', @{ $params{extra} }); + 'trust', '-N', + '-x', '1249835483136', '-m', '2422361554944', '-o', '3594887626752', + @{ $params{extra} }); PostgreSQL::Test::Utils::system_or_bail($ENV{PG_REGRESS}, '--config-auth', $pgdata, @{ $params{auth_extra} }); diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index e8e1a420bc1..8329d2ff7ee 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -57,7 +57,7 @@ $node_primary->init(has_archiving => 1, allows_streaming => 1); # Bump the transaction ID epoch. This is useful to stress the portability # of recovery_target_xid parsing. -system_or_bail('pg_resetwal', '--epoch', '1', $node_primary->data_dir); +system_or_bail('pg_resetwal', $node_primary->data_dir); # Start it $node_primary->start; diff --git a/src/test/regress/expected/indirect_toast.out b/src/test/regress/expected/indirect_toast.out index 44b54dc37fd..e9235af1edf 100644 --- a/src/test/regress/expected/indirect_toast.out +++ b/src/test/regress/expected/indirect_toast.out @@ -161,6 +161,14 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; ("one-toasted,one-null, via indirect",0,1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 (5 rows) +create or replace function random_string(len integer) returns text as $$ +select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len); +$$ language sql; +create table toasttest_main(t text); +alter table toasttest_main alter column t set storage main; +insert into toasttest_main (select random_string(len) from generate_series(8000,9000) len); DROP TABLE indtoasttest; +DROP TABLE toasttest_main; DROP FUNCTION update_using_indirect(); +DROP FUNCTION random_string(integer); RESET default_toast_compression; diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out index dd4354fc7d8..d52545b443d 100644 --- a/src/test/regress/expected/insert.out +++ b/src/test/regress/expected/insert.out @@ -100,7 +100,7 @@ SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main')); INSERT INTO large_tuple_test (select 3, NULL); -- now this tuple won't fit on the second page, but the insert should -- still succeed by extending the relation -INSERT INTO large_tuple_test (select 4, repeat('a', 8126)); +INSERT INTO large_tuple_test (select 4, repeat('a', 8112)); DROP TABLE large_tuple_test; -- -- check indirection (field/array assignment), cf bug #14265 @@ -980,3 +980,17 @@ insert into returningwrtest values (2, 'foo') returning returningwrtest; (1 row) drop table returningwrtest; +-- Check for MaxHeapTupleSize +create table maxheaptuplesize_test(value text); +alter table maxheaptuplesize_test alter column value set storage external; +insert into maxheaptuplesize_test values (repeat('x', 8104)); +insert into maxheaptuplesize_test values (repeat('x', 8112)); +insert into maxheaptuplesize_test values (repeat('x', 8120)); +insert into maxheaptuplesize_test values (repeat('x', 8128)); +insert into maxheaptuplesize_test values (repeat('x', 8136)); +insert into maxheaptuplesize_test values (repeat('x', 8144)); +insert into maxheaptuplesize_test values (repeat('x', 8152)); +insert into maxheaptuplesize_test values (repeat('x', 8160)); +insert into maxheaptuplesize_test values (repeat('x', 8168)); +insert into maxheaptuplesize_test values (repeat('x', 8176)); +drop table maxheaptuplesize_test; diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 86d755aa443..317d0f1b8bc 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -197,7 +197,7 @@ WHERE p1.oid != p2.oid AND ORDER BY 1, 2; proargtypes | proargtypes -----------------------------+-------------------------- - integer | xid + bigint | xid timestamp without time zone | timestamp with time zone bit | bit varying txid_snapshot | pg_snapshot @@ -705,7 +705,7 @@ int8(oid) tideq(tid,tid) timestamptz_cmp(timestamp with time zone,timestamp with time zone) interval_cmp(interval,interval) -xideqint4(xid,integer) +xideqint8(xid,bigint) timetz_eq(time with time zone,time with time zone) timetz_ne(time with time zone,time with time zone) timetz_lt(time with time zone,time with time zone) @@ -819,7 +819,7 @@ pg_lsn_gt(pg_lsn,pg_lsn) pg_lsn_ne(pg_lsn,pg_lsn) pg_lsn_cmp(pg_lsn,pg_lsn) xidneq(xid,xid) -xidneqint4(xid,integer) +xidneqint8(xid,bigint) sha224(bytea) sha256(bytea) sha384(bytea) diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out index 1aeed8452bd..d3be84754c1 100644 --- a/src/test/regress/expected/select_views.out +++ b/src/test/regress/expected/select_views.out @@ -2,9 +2,22 @@ -- SELECT_VIEWS -- test the views defined in CREATE_VIEWS -- -SELECT * FROM street; +SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C"; name | thepath | cname ------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------- + 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland + 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland + 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette + 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley + 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland + 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette + 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley + 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley + 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland + 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland + 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland + 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland + 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette Access Rd 25 | [(-121.9283,37.894),(-121.9283,37.9)] | Oakland Ada St | [(-122.2487,37.398),(-122.2496,37.401)] | Lafayette Agua Fria Creek | [(-121.9254,37.922),(-121.9281,37.889)] | Oakland @@ -22,8 +35,8 @@ SELECT * FROM street; Arroyo Las Positas | [(-121.7973,37.997),(-121.7957,37.005)] | Oakland Arroyo Seco | [(-121.7073,37.766),(-121.6997,37.729)] | Oakland Ash St | [(-122.0408,37.31),(-122.04,37.292)] | Oakland - Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Berkeley + Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Oakland Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Berkeley Avenue D | [(-122.298,37.848),(-122.3024,37.849)] | Berkeley @@ -37,14 +50,14 @@ SELECT * FROM street; Broadmore Ave | [(-122.095,37.522),(-122.0936,37.497)] | Oakland Broadway | [(-122.2409,37.586),(-122.2395,37.601)] | Berkeley Buckingham Blvd | [(-122.2231,37.59),(-122.2214,37.606)] | Berkeley + Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland - Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley C St | [(-122.1768,37.46),(-122.1749,37.435)] | Oakland Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland - California St | [(-122.2032,37.005),(-122.2016,37.996)] | Berkeley California St | [(-122.2032,37.005),(-122.2016,37.996)] | Lafayette + California St | [(-122.2032,37.005),(-122.2016,37.996)] | Berkeley Cameron Ave | [(-122.1316,37.502),(-122.1327,37.481)] | Oakland Campus Dr | [(-122.1704,37.905),(-122.1678,37.868),(-122.1671,37.865)] | Berkeley Capricorn Ave | [(-122.2176,37.404),(-122.2164,37.384)] | Lafayette @@ -55,8 +68,8 @@ SELECT * FROM street; Central Ave | [(-122.2343,37.602),(-122.2331,37.595)] | Berkeley Chambers Dr | [(-122.2004,37.352),(-122.1972,37.368)] | Lafayette Chambers Lane | [(-122.2001,37.359),(-122.1975,37.371)] | Lafayette - Champion St | [(-122.214,37.991),(-122.2147,37.002)] | Berkeley Champion St | [(-122.214,37.991),(-122.2147,37.002)] | Lafayette + Champion St | [(-122.214,37.991),(-122.2147,37.002)] | Berkeley Chapman Dr | [(-122.0421,37.504),(-122.0414,37.498)] | Oakland Charles St | [(-122.0255,37.505),(-122.0252,37.499)] | Oakland Cherry St | [(-122.0437,37.42),(-122.0434,37.413)] | Oakland @@ -77,9 +90,9 @@ SELECT * FROM street; Cull Canyon Road | [(-122.0536,37.435),(-122.0499,37.315)] | Oakland Cull Creek | [(-122.0624,37.875),(-122.0582,37.527)] | Berkeley D St | [(-122.1811,37.505),(-122.1805,37.497)] | Oakland + Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland - Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley Deering St | [(-122.2146,37.904),(-122.2126,37.897)] | Berkeley Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Berkeley Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Lafayette @@ -117,12 +130,12 @@ SELECT * FROM street; I- 580 | [(-121.9322,37.989),(-121.9243,37.006),(-121.9217,37.014)] | Oakland I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland - I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Berkeley + I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Oakland I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Berkeley - I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Berkeley I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Lafayette + I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Berkeley I- 580 Ramp | [(-121.8521,37.011),(-121.8479,37.999),(-121.8476,37.999),(-121.8456,37.01),(-121.8455,37.011)] | Oakland I- 580 Ramp | [(-121.8521,37.011),(-121.8479,37.999),(-121.8476,37.999),(-121.8456,37.01),(-121.8455,37.011)] | Oakland I- 580 Ramp | [(-121.8743,37.014),(-121.8722,37.999),(-121.8714,37.999)] | Oakland @@ -136,8 +149,8 @@ SELECT * FROM street; I- 580 Ramp | [(-122.0941,37.897),(-122.0943,37.902)] | Berkeley I- 580 Ramp | [(-122.096,37.888),(-122.0962,37.891),(-122.0964,37.9)] | Berkeley I- 580 Ramp | [(-122.101,37.898),(-122.1005,37.902),(-122.0989,37.911)] | Berkeley - I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Berkeley + I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland I- 580 Ramp | [(-122.1414,37.383),(-122.1407,37.376),(-122.1403,37.372),(-122.139,37.356)] | Oakland I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland @@ -158,16 +171,16 @@ SELECT * FROM street; I- 880 | ((-121.9669,37.075),(-121.9663,37.071),(-121.9656,37.065),(-121.9618,37.037),(-121.95689,37),(-121.948,37.933)) | Oakland I- 880 | [(-121.948,37.933),(-121.9471,37.925),(-121.9467,37.923),(-121.946,37.918),(-121.9452,37.912),(-121.937,37.852)] | Oakland I- 880 | [(-122.0219,37.466),(-122.0205,37.447),(-122.020331,37.44447),(-122.020008,37.43962),(-122.0195,37.432),(-122.0193,37.429),(-122.0164,37.393),(-122.010219,37.34771),(-122.0041,37.313)] | Oakland - I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Berkeley - I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland + I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Berkeley + I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland I- 880 | [(-122.0831,37.312),(-122.0819,37.296),(-122.081,37.285),(-122.0786,37.248),(-122.078,37.24),(-122.077642,37.23496),(-122.076983,37.22567),(-122.076599,37.22026),(-122.076229,37.21505),(-122.0758,37.209)] | Oakland I- 880 | [(-122.0978,37.528),(-122.096,37.496),(-122.0931,37.453),(-122.09277,37.4496),(-122.090189,37.41442),(-122.0896,37.405),(-122.085,37.34)] | Oakland I- 880 | [(-122.1365,37.902),(-122.1358,37.898),(-122.1333,37.881),(-122.1323,37.874),(-122.1311,37.866),(-122.1308,37.865),(-122.1307,37.864),(-122.1289,37.851),(-122.1277,37.843),(-122.1264,37.834),(-122.1231,37.812),(-122.1165,37.766),(-122.1104,37.72),(-122.109695,37.71094),(-122.109,37.702),(-122.108312,37.69168),(-122.1076,37.681)] | Berkeley - I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Berkeley + I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland I- 880 | [(-122.2214,37.711),(-122.2202,37.699),(-122.2199,37.695),(-122.219,37.682),(-122.2184,37.672),(-122.2173,37.652),(-122.2159,37.638),(-122.2144,37.616),(-122.2138,37.612),(-122.2135,37.609),(-122.212,37.592),(-122.2116,37.586),(-122.2111,37.581)] | Berkeley I- 880 | [(-122.2707,37.975),(-122.2693,37.972),(-122.2681,37.966),(-122.267,37.962),(-122.2659,37.957),(-122.2648,37.952),(-122.2636,37.946),(-122.2625,37.935),(-122.2617,37.927),(-122.2607,37.921),(-122.2593,37.916),(-122.258,37.911),(-122.2536,37.898),(-122.2432,37.858),(-122.2408,37.845),(-122.2386,37.827),(-122.2374,37.811)] | Berkeley I- 880 Ramp | [(-122.0019,37.301),(-122.002,37.293)] | Oakland @@ -202,28 +215,28 @@ SELECT * FROM street; Laguna Ave | [(-122.2099,37.989),(-122.2089,37)] | Berkeley Laguna Ave | [(-122.2099,37.989),(-122.2089,37)] | Lafayette Lakehurst Cir | [(-122.284729,37.89025),(-122.286096,37.90364)] | Berkeley - Lakeshore Ave | [(-122.2586,37.99),(-122.2556,37.006)] | Berkeley Lakeshore Ave | [(-122.2586,37.99),(-122.2556,37.006)] | Lafayette + Lakeshore Ave | [(-122.2586,37.99),(-122.2556,37.006)] | Berkeley Las Positas Road | [(-121.764488,37.99199),(-121.75569,37.02022)] | Oakland Las Positas Road | [(-121.764488,37.99199),(-121.75569,37.02022)] | Oakland - Linden St | [(-122.2867,37.998),(-122.2864,37.008)] | Berkeley Linden St | [(-122.2867,37.998),(-122.2864,37.008)] | Lafayette + Linden St | [(-122.2867,37.998),(-122.2864,37.008)] | Berkeley Livermore Ave | [(-121.7687,37.448),(-121.769,37.375)] | Oakland Livermore Ave | [(-121.7687,37.448),(-121.769,37.375)] | Oakland Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland - Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Berkeley + Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland Logan Ct | [(-122.0053,37.492),(-122.0061,37.484)] | Oakland Magnolia St | [(-122.0971,37.5),(-122.0962,37.484)] | Oakland Mandalay Road | [(-122.2322,37.397),(-122.2321,37.403)] | Lafayette Marin Ave | [(-122.2741,37.894),(-122.272,37.901)] | Berkeley Martin Luther King Jr Way | [(-122.2712,37.608),(-122.2711,37.599)] | Berkeley Mattos Dr | [(-122.0005,37.502),(-122.000898,37.49683)] | Oakland - Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Berkeley - McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland + Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Berkeley + McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland Medlar Dr | [(-122.0627,37.378),(-122.0625,37.375)] | Oakland Mildred Ct | [(-122.0002,37.388),(-121.9998,37.386)] | Oakland Miller Road | [(-122.0902,37.645),(-122.0865,37.545)] | Berkeley @@ -242,8 +255,8 @@ SELECT * FROM street; Parkridge Dr | [(-122.1438,37.884),(-122.1428,37.9)] | Berkeley Parkside Dr | [(-122.0475,37.603),(-122.0443,37.596)] | Berkeley Paseo Padre Pkwy | [(-121.9143,37.005),(-121.913522,37)] | Oakland - Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Berkeley + Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland Pearl St | [(-122.2383,37.594),(-122.2366,37.615)] | Berkeley Periwinkle Road | [(-122.0451,37.301),(-122.044758,37.29844)] | Oakland Pimlico Dr | [(-121.8616,37.998),(-121.8618,37.008)] | Oakland @@ -254,11 +267,11 @@ SELECT * FROM street; Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Oakland Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Berkeley Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland - Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Berkeley + Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland Redding St | [(-122.1978,37.901),(-122.1975,37.895)] | Berkeley - Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Berkeley + Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland Roca Dr | [(-122.0335,37.609),(-122.0314,37.599)] | Berkeley Rosedale Ct | [(-121.9232,37.9),(-121.924,37.897)] | Oakland Sacramento St | [(-122.2799,37.606),(-122.2797,37.597)] | Berkeley @@ -266,8 +279,8 @@ SELECT * FROM street; Saginaw Ct | [(-121.8803,37.898),(-121.8806,37.901)] | Oakland San Andreas Dr | [(-122.0609,37.9),(-122.0614,37.895)] | Berkeley Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland - Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Berkeley + Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland Shattuck Ave | [(-122.2686,37.904),(-122.2686,37.897)] | Berkeley Sheridan Road | [(-122.2279,37.425),(-122.2253,37.411),(-122.2223,37.377)] | Lafayette Shoreline Dr | [(-122.2657,37.603),(-122.2648,37.6)] | Berkeley @@ -317,27 +330,14 @@ SELECT * FROM street; Welch Creek Road | [(-121.7695,37.386),(-121.7737,37.413)] | Oakland West Loop Road | [(-122.0576,37.604),(-122.0602,37.586)] | Berkeley Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland - Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Berkeley + Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland Willimet Way | [(-122.0964,37.517),(-122.0949,37.493)] | Oakland Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Oakland Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Berkeley Wp Railroad | [(-122.254,37.902),(-122.2506,37.891)] | Berkeley - 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland - 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland - 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette - 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley - 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland - 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette - 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley - 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley - 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland - 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland - 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland - 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland - 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette (333 rows) SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2; diff --git a/src/test/regress/expected/txid.out b/src/test/regress/expected/txid.out index 95ba66e95ee..2ea4434f513 100644 --- a/src/test/regress/expected/txid.out +++ b/src/test/regress/expected/txid.out @@ -238,9 +238,11 @@ SELECT txid_snapshot '1:9223372036854775807:3'; (1 row) SELECT txid_snapshot '1:9223372036854775808:3'; -ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3" -LINE 1: SELECT txid_snapshot '1:9223372036854775808:3'; - ^ + txid_snapshot +------------------------- + 1:9223372036854775808:3 +(1 row) + -- test txid_current_if_assigned BEGIN; SELECT txid_current_if_assigned() IS NULL; diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index d3ac08c9ee3..952019b2e27 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -19,7 +19,7 @@ WHERE t1.typnamespace = 0 OR (t1.typlen <= 0 AND t1.typlen != -1 AND t1.typlen != -2) OR (t1.typtype not in ('b', 'c', 'd', 'e', 'm', 'p', 'r')) OR NOT t1.typisdefined OR - (t1.typalign not in ('c', 's', 'i', 'd')) OR + (t1.typalign not in ('c', 's', 'i', 'd', 'x')) OR (t1.typstorage not in ('p', 'x', 'e', 'm')); oid | typname -----+--------- @@ -32,7 +32,8 @@ WHERE t1.typbyval AND (t1.typlen != 1 OR t1.typalign != 'c') AND (t1.typlen != 2 OR t1.typalign != 's') AND (t1.typlen != 4 OR t1.typalign != 'i') AND - (t1.typlen != 8 OR t1.typalign != 'd'); + (t1.typlen != 8 OR t1.typalign != 'd') AND + (t1.typlen != 8 OR t1.typalign != 'x'); oid | typname -----+--------- (0 rows) diff --git a/src/test/regress/expected/xid.out b/src/test/regress/expected/xid.out index d8e76f3321f..3252bdc28d9 100644 --- a/src/test/regress/expected/xid.out +++ b/src/test/regress/expected/xid.out @@ -8,9 +8,9 @@ select '010'::xid, '42'::xid8, '0xffffffffffffffff'::xid8, '-1'::xid8; - xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8 ------+-----+------------+------------+------+------+----------------------+---------------------- - 8 | 42 | 4294967295 | 4294967295 | 8 | 42 | 18446744073709551615 | 18446744073709551615 + xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8 +-----+-----+------------+----------------------+------+------+----------------------+---------------------- + 8 | 42 | 4294967295 | 18446744073709551615 | 8 | 42 | 18446744073709551615 | 18446744073709551615 (1 row) -- garbage values are not yet rejected (perhaps they should be) @@ -381,9 +381,11 @@ SELECT pg_snapshot '1:9223372036854775807:3'; (1 row) SELECT pg_snapshot '1:9223372036854775808:3'; -ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3" -LINE 1: SELECT pg_snapshot '1:9223372036854775808:3'; - ^ + pg_snapshot +------------------------- + 1:9223372036854775808:3 +(1 row) + -- test pg_current_xact_id_if_assigned BEGIN; SELECT pg_current_xact_id_if_assigned() IS NULL; diff --git a/src/test/regress/expected/xid64.out b/src/test/regress/expected/xid64.out new file mode 100644 index 00000000000..9bd760aebc9 --- /dev/null +++ b/src/test/regress/expected/xid64.out @@ -0,0 +1,122 @@ +--- +--- Unit test for xid64 functions +--- +-- directory paths and dlsuffix are passed to us in environment variables +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX +\set regresslib :libdir '/regress' :dlsuffix +CREATE FUNCTION xid64_test_1(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_1' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_2(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_2' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_3(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_3' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_double_xmax(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_double_xmax' LANGUAGE C STRICT; +--- +--- Check page consistency after conversion (on empty page) +--- +CREATE TABLE test_xid64_table(a int); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); +INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a; +SELECT xid64_test_1('test_xid64_table'); +INFO: test 1: begin +INFO: test 1: page is converted to xid64 format +INFO: test 1: done + xid64_test_1 +-------------- + +(1 row) + +DROP TABLE test_xid64_table; +--- +--- Check page consistency after conversion (on actual page) +--- +CREATE TABLE test_xid64_table(a int); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); +INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a; +SELECT xid64_test_2('test_xid64_table'); +INFO: test 2: begin +INFO: test 2: page is converted to xid64 format +INFO: test 2: done + xid64_test_2 +-------------- + +(1 row) + +DROP TABLE test_xid64_table; +--- +--- Check tuples consistency after conversion +--- +CREATE TABLE test_xid64_table(s serial, i int, t text); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); +DO $$ +BEGIN + FOR j IN 1..20 LOOP + INSERT INTO test_xid64_table(i, t) VALUES (random()::int, md5(random()::text)); + COMMIT; + END LOOP; +END $$; +DO $$ +BEGIN + FOR j IN 1..10 LOOP + DELETE FROM test_xid64_table WHERE ctid IN (SELECT ctid FROM test_xid64_table TABLESAMPLE BERNOULLI (5)); + COMMIT; + END LOOP; +END $$; +SELECT xid64_test_3('test_xid64_table'); +INFO: test 3: begin +INFO: test 3: 1 pages are tested +INFO: test 3: done + xid64_test_3 +-------------- + +(1 row) + +DROP TABLE test_xid64_table; +--- +--- Check tuples consistency after conversion to double xmax (on full page) +--- +CREATE TABLE test_xid64_table(i int); +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table SELECT i FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; +SELECT xid64_test_3('test_xid64_table'); +INFO: test 3: begin +INFO: test 3: 18 pages are tested +INFO: test 3: done + xid64_test_3 +-------------- + +(1 row) + +DROP TABLE test_xid64_table; +CREATE TABLE test_xid64_table(i text); +INSERT INTO test_xid64_table(i) VALUES ('NNBABCDSDFGHJKLP'); +CREATE TABLE test_xid64_table(i int); +ERROR: relation "test_xid64_table" already exists +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table(i) SELECT 'A' FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; +SELECT xid64_test_double_xmax('test_xid64_table'); +INFO: test double xmax: page 0 is converted into double xmax format +INFO: test double xmax: 18 pages are tested +INFO: test double xmax: end + xid64_test_double_xmax +------------------------ + +(1 row) + +DROP TABLE test_xid64_table; +DROP FUNCTION xid64_test_1(rel regclass); +DROP FUNCTION xid64_test_2(rel regclass); +DROP FUNCTION xid64_test_3(rel regclass); +DROP FUNCTION xid64_test_double_xmax(rel regclass); diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 103e11483d2..456cd974405 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -33,7 +33,7 @@ test: strings numerology point lseg line box path polygon circle date time timet # geometry depends on point, lseg, line, box, path, polygon, circle # horology depends on date, time, timetz, timestamp, timestamptz, interval # ---------- -test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc +test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid xid64 mvcc # ---------- # Load huge amounts of data diff --git a/src/test/regress/pg_regress.c b/src/test/regress/pg_regress.c index 982801e029d..17cc8ea066e 100644 --- a/src/test/regress/pg_regress.c +++ b/src/test/regress/pg_regress.c @@ -2267,7 +2267,7 @@ regression_main(int argc, char *argv[], /* initdb */ header(_("initializing database system")); snprintf(buf, sizeof(buf), - "\"%s%sinitdb\" -D \"%s/data\" --no-clean --no-sync%s%s > \"%s/log/initdb.log\" 2>&1", + "\"%s%sinitdb\" -D \"%s/data\" -x 1249835483136 -m 2422361554944 -o 3594887626752 --no-clean --no-sync%s%s > \"%s/log/initdb.log\" 2>&1", bindir ? bindir : "", bindir ? "/" : "", temp_instance, diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index ba3532a51e8..60b37347a78 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -1257,3 +1257,359 @@ get_columns_length(PG_FUNCTION_ARGS) PG_RETURN_INT32(column_offset); } + +#include "access/hio.h" +#include "access/relation.h" +#include "storage/bufmgr.h" +#include "utils/rel.h" + +static void +CheckNewPage(char *msg, Page page) +{ + uint16 size; + + if (PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION) + elog(ERROR, "%s: page version is %d, expected %d ", + msg, PageGetPageLayoutVersion(page), PG_PAGE_LAYOUT_VERSION); + + size = PageGetSpecialSize(page); + if (size == MAXALIGN(sizeof(HeapPageSpecialData))) + elog(INFO, "%s: page is converted to xid64 format", msg); + else if (HeapPageIsDoubleXmax(page)) + elog(INFO, "%s: page is converted into double xmax format", msg); + else + elog(ERROR, "%s: converted page has pageSpecial size %u, expected %llu", + msg, size, + (unsigned long long) MAXALIGN(sizeof(HeapPageSpecialData))); +} + +/* + * Construct empty page in 32-bit xid format. + * Convert it to 64-bit xid format. + * Run basic checks. + */ +PG_FUNCTION_INFO_V1(xid64_test_1); +Datum +xid64_test_1(PG_FUNCTION_ARGS) +{ + Oid relid; + Relation rel; + Buffer buf; + char data[BLCKSZ]; + Page page; + + elog(INFO, "test 1: begin"); + + relid = PG_GETARG_OID(0); + rel = relation_open(relid, AccessExclusiveLock); + + buf = ReadBuffer(rel, 0); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + page = data; + PageInit(page, BLCKSZ, 0); + PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1); + + if (PageGetSpecialSize(page) > 0) + elog(ERROR, "old page special not expected"); + + convert_page(rel, page, buf, 0); + CheckNewPage("test 1", page); + + UnlockReleaseBuffer(buf); + relation_close(rel, AccessExclusiveLock); + elog(INFO, "test 1: done"); + + PG_RETURN_VOID(); +} + +/* + * Get page from relation. + * Make this page look like in 32-bit xid format. + * Convert it to 64-bit xid format. + * Run basic checks. + */ +PG_FUNCTION_INFO_V1(xid64_test_2); +Datum +xid64_test_2(PG_FUNCTION_ARGS) +{ + Oid relid; + Relation rel; + Buffer buf; + Page page; + PageHeader hdr; + + elog(INFO, "test 2: begin"); + + relid = PG_GETARG_OID(0); + rel = relation_open(relid, AccessExclusiveLock); + + buf = ReadBuffer(rel, 0); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(buf); + hdr = (PageHeader) page; + + if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HeapPageSpecialData))) + elog(ERROR, "page expected in new format"); + + hdr->pd_special = BLCKSZ; + PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1); + + convert_page(rel, page, buf, 0); + CheckNewPage("test 2", page); + + UnlockReleaseBuffer(buf); + relation_close(rel, AccessExclusiveLock); + elog(INFO, "test 2: done"); + + PG_RETURN_VOID(); +} + +typedef struct TupleCheckValues +{ + TransactionId xmin; + TransactionId xmax; +} TupleCheckValues; + +typedef struct RelCheckValues +{ + TupleCheckValues *tcv; + Size ntuples; +} RelCheckValues; + +static RelCheckValues +FillRelCheckValues(Relation rel) +{ + RelCheckValues set; + BlockNumber pageno, + npages; + Size n; + + npages = RelationGetNumberOfBlocks(rel); + if (npages == 0) + elog(ERROR, "relation \"%s\" is empty", NameStr(rel->rd_rel->relname)); + +#define DEFAULT_SET_SIZE 64 + n = DEFAULT_SET_SIZE; + set.ntuples = 0; + set.tcv = palloc(sizeof(set.tcv[0]) * n); + + for (pageno = 0; pageno != npages; ++pageno) + { + Buffer buf; + Page page; + OffsetNumber maxoff, + offnum; + HeapTupleHeader tuphdr; + ItemId itemid; + HeapTupleData tuple; + TransactionId xmin, + xmax; + + buf = ReadBuffer(rel, pageno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + tuphdr = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_data = tuphdr; + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + + if (HeapPageGetSpecial(page) == doubleXmaxSpecial) + { + xmin = tuphdr->t_choice.t_heap.t_xmin; + xmax = tuphdr->t_choice.t_heap.t_xmax; + } + else + { + HeapTupleCopyBaseFromPage(&tuple, page); + + xmin = HeapTupleGetRawXmin(&tuple); + xmax = HeapTupleGetRawXmax(&tuple); + } + + if (set.ntuples == n) + { + n *= 2; + set.tcv = repalloc(set.tcv, sizeof(set.tcv[0]) * n); + } + + set.tcv[set.ntuples].xmin = xmin; + set.tcv[set.ntuples].xmax = xmax; + set.ntuples++; + } + + UnlockReleaseBuffer(buf); + } + + return set; +} + +/* + * Test xmin/xmax invariant when converting page from 32bit xid to 64xid. + * + * Scenario: + * - enforce all relation pages to 32bit xid format, discarding pd_xid_base and + * pd_multi_base + * - store all xmin/xmax in array + * - convert all the pages from relation into 64xid format + * - store all new xmin/xmax in array + * - compare old and new xmin/xmax + * + * NOTE: inital xid value does not affect test as pd_xid_base/pd_multi_base + * discarded. + */ +PG_FUNCTION_INFO_V1(xid64_test_3); +Datum +xid64_test_3(PG_FUNCTION_ARGS) +{ + Oid relid; + Relation rel; + RelCheckValues before, + after; + BlockNumber pageno, + npages; + Size i; + + elog(INFO, "test 3: begin"); + + relid = PG_GETARG_OID(0); + rel = relation_open(relid, AccessExclusiveLock); + npages = RelationGetNumberOfBlocks(rel); + + for (pageno = 0; pageno != npages; ++pageno) + { + Buffer buf; + Page page; + PageHeader hdr; + + buf = ReadBuffer(rel, pageno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); + hdr = (PageHeader) page; + + hdr->pd_special = BLCKSZ; + PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1); + UnlockReleaseBuffer(buf); + } + + before = FillRelCheckValues(rel); + + for (pageno = 0; pageno != npages; ++pageno) + { + Buffer buf; + Page page; + + buf = ReadBuffer(rel, pageno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); + + convert_page(rel, page, buf, pageno); + UnlockReleaseBuffer(buf); + } + + after = FillRelCheckValues(rel); + + if (before.ntuples != after.ntuples) + elog(ERROR, "numer of tuples must be equal"); + + for (i = 0; i != before.ntuples; ++i) + { + if (before.tcv[i].xmin != after.tcv[i].xmin && after.tcv[i].xmin) + elog(ERROR, "old and new xmin does not match (%llu != %llu)", + (unsigned long long) before.tcv[i].xmin, + (unsigned long long) after.tcv[i].xmin); + + if (before.tcv[i].xmax != after.tcv[i].xmax) + elog(ERROR, "old and new xmax does not match (%llu != %llu)", + (unsigned long long) before.tcv[i].xmax, + (unsigned long long) after.tcv[i].xmax); + } + + elog(INFO, "test 3: %u pages are tested", npages); + + pfree(before.tcv); + pfree(after.tcv); + + relation_close(rel, AccessExclusiveLock); + elog(INFO, "test 3: done"); + + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(xid64_test_double_xmax); +Datum +xid64_test_double_xmax(PG_FUNCTION_ARGS) +{ + Oid relid; + Relation rel; + BlockNumber pageno, + npages; + bool found; + + relid = PG_GETARG_OID(0); + rel = relation_open(relid, AccessExclusiveLock); + npages = RelationGetNumberOfBlocks(rel); + found = false; + + for (pageno = 0; pageno != npages; ++pageno) + { + Buffer buf; + Page page; + PageHeader hdr; + ItemId itemid; + OffsetNumber offnum; + HeapTupleHeader tuphdr; + + buf = ReadBuffer(rel, pageno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); + hdr = (PageHeader) page; + + if (pageno == 0) + { + itemid = PageGetItemId(page, FirstOffsetNumber); + itemid->lp_len += 16; /* Move to overlap special */ + } + + for (offnum = FirstOffsetNumber; + offnum <= PageGetMaxOffsetNumber(page); + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + tuphdr = (HeapTupleHeader) PageGetItem(page, itemid); + tuphdr->t_infomask |= HEAP_XMIN_COMMITTED; + } + + hdr->pd_special = BLCKSZ; + PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1); + + convert_page(rel, page, buf, pageno); + + if (HeapPageIsDoubleXmax(page)) + { + found = true; + elog(INFO, "test double xmax: page %u is converted into double xmax format", + pageno); + } + + UnlockReleaseBuffer(buf); + } + + if (!found) + elog(ERROR, "test double xmax: failed, no double xmax"); + + elog(INFO, "test double xmax: %u pages are tested", npages); + elog(INFO, "test double xmax: end"); + + relation_close(rel, AccessExclusiveLock); + + PG_RETURN_VOID(); +} diff --git a/src/test/regress/sql/indirect_toast.sql b/src/test/regress/sql/indirect_toast.sql index 3e2f6c02375..0444867f66b 100644 --- a/src/test/regress/sql/indirect_toast.sql +++ b/src/test/regress/sql/indirect_toast.sql @@ -76,7 +76,18 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; VACUUM FREEZE indtoasttest; SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; +create or replace function random_string(len integer) returns text as $$ +select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len); +$$ language sql; + +create table toasttest_main(t text); +alter table toasttest_main alter column t set storage main; + +insert into toasttest_main (select random_string(len) from generate_series(8000,9000) len); + DROP TABLE indtoasttest; +DROP TABLE toasttest_main; DROP FUNCTION update_using_indirect(); +DROP FUNCTION random_string(integer); RESET default_toast_compression; diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql index bdcffd03146..7ada0801ebf 100644 --- a/src/test/regress/sql/insert.sql +++ b/src/test/regress/sql/insert.sql @@ -55,7 +55,7 @@ INSERT INTO large_tuple_test (select 3, NULL); -- now this tuple won't fit on the second page, but the insert should -- still succeed by extending the relation -INSERT INTO large_tuple_test (select 4, repeat('a', 8126)); +INSERT INTO large_tuple_test (select 4, repeat('a', 8112)); DROP TABLE large_tuple_test; @@ -597,3 +597,18 @@ alter table returningwrtest2 drop c; alter table returningwrtest attach partition returningwrtest2 for values in (2); insert into returningwrtest values (2, 'foo') returning returningwrtest; drop table returningwrtest; + +-- Check for MaxHeapTupleSize +create table maxheaptuplesize_test(value text); +alter table maxheaptuplesize_test alter column value set storage external; +insert into maxheaptuplesize_test values (repeat('x', 8104)); +insert into maxheaptuplesize_test values (repeat('x', 8112)); +insert into maxheaptuplesize_test values (repeat('x', 8120)); +insert into maxheaptuplesize_test values (repeat('x', 8128)); +insert into maxheaptuplesize_test values (repeat('x', 8136)); +insert into maxheaptuplesize_test values (repeat('x', 8144)); +insert into maxheaptuplesize_test values (repeat('x', 8152)); +insert into maxheaptuplesize_test values (repeat('x', 8160)); +insert into maxheaptuplesize_test values (repeat('x', 8168)); +insert into maxheaptuplesize_test values (repeat('x', 8176)); +drop table maxheaptuplesize_test; diff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql index e742f136990..70e663e350c 100644 --- a/src/test/regress/sql/select_views.sql +++ b/src/test/regress/sql/select_views.sql @@ -3,7 +3,7 @@ -- test the views defined in CREATE_VIEWS -- -SELECT * FROM street; +SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C"; SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2; diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index 5edc1f1f6ed..2b6c4aff6c1 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -22,7 +22,7 @@ WHERE t1.typnamespace = 0 OR (t1.typlen <= 0 AND t1.typlen != -1 AND t1.typlen != -2) OR (t1.typtype not in ('b', 'c', 'd', 'e', 'm', 'p', 'r')) OR NOT t1.typisdefined OR - (t1.typalign not in ('c', 's', 'i', 'd')) OR + (t1.typalign not in ('c', 's', 'i', 'd', 'x')) OR (t1.typstorage not in ('p', 'x', 'e', 'm')); -- Look for "pass by value" types that can't be passed by value. @@ -33,7 +33,8 @@ WHERE t1.typbyval AND (t1.typlen != 1 OR t1.typalign != 'c') AND (t1.typlen != 2 OR t1.typalign != 's') AND (t1.typlen != 4 OR t1.typalign != 'i') AND - (t1.typlen != 8 OR t1.typalign != 'd'); + (t1.typlen != 8 OR t1.typalign != 'd') AND + (t1.typlen != 8 OR t1.typalign != 'x'); -- Look for "toastable" types that aren't varlena. diff --git a/src/test/regress/sql/xid64.sql b/src/test/regress/sql/xid64.sql new file mode 100644 index 00000000000..0a29e9635ab --- /dev/null +++ b/src/test/regress/sql/xid64.sql @@ -0,0 +1,97 @@ +--- +--- Unit test for xid64 functions +--- + +-- directory paths and dlsuffix are passed to us in environment variables +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX + +\set regresslib :libdir '/regress' :dlsuffix + +CREATE FUNCTION xid64_test_1(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_1' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_2(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_2' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_3(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_3' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_double_xmax(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_double_xmax' LANGUAGE C STRICT; + +--- +--- Check page consistency after conversion (on empty page) +--- +CREATE TABLE test_xid64_table(a int); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); +INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a; +SELECT xid64_test_1('test_xid64_table'); +DROP TABLE test_xid64_table; + +--- +--- Check page consistency after conversion (on actual page) +--- +CREATE TABLE test_xid64_table(a int); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); +INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a; +SELECT xid64_test_2('test_xid64_table'); +DROP TABLE test_xid64_table; + +--- +--- Check tuples consistency after conversion +--- +CREATE TABLE test_xid64_table(s serial, i int, t text); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); + +DO $$ +BEGIN + FOR j IN 1..20 LOOP + INSERT INTO test_xid64_table(i, t) VALUES (random()::int, md5(random()::text)); + COMMIT; + END LOOP; +END $$; + +DO $$ +BEGIN + FOR j IN 1..10 LOOP + DELETE FROM test_xid64_table WHERE ctid IN (SELECT ctid FROM test_xid64_table TABLESAMPLE BERNOULLI (5)); + COMMIT; + END LOOP; +END $$; + +SELECT xid64_test_3('test_xid64_table'); +DROP TABLE test_xid64_table; + +--- +--- Check tuples consistency after conversion to double xmax (on full page) +--- +CREATE TABLE test_xid64_table(i int); + +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table SELECT i FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; + +SELECT xid64_test_3('test_xid64_table'); +DROP TABLE test_xid64_table; + +CREATE TABLE test_xid64_table(i text); +INSERT INTO test_xid64_table(i) VALUES ('NNBABCDSDFGHJKLP'); +CREATE TABLE test_xid64_table(i int); + +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table(i) SELECT 'A' FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; + +SELECT xid64_test_double_xmax('test_xid64_table'); +DROP TABLE test_xid64_table; + +DROP FUNCTION xid64_test_1(rel regclass); +DROP FUNCTION xid64_test_2(rel regclass); +DROP FUNCTION xid64_test_3(rel regclass); +DROP FUNCTION xid64_test_double_xmax(rel regclass); diff --git a/src/test/xid-64/Makefile b/src/test/xid-64/Makefile new file mode 100644 index 00000000000..3b1e50dfc0d --- /dev/null +++ b/src/test/xid-64/Makefile @@ -0,0 +1,22 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/test/xid-64 +# +# Copyright (c) 2018, Postgres Professional +# +# src/test/xid-64/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/test/xid-64 +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/xid-64/README b/src/test/xid-64/README new file mode 100644 index 00000000000..01c0a1a1f74 --- /dev/null +++ b/src/test/xid-64/README @@ -0,0 +1,16 @@ +src/test/xid-64/README + +Regression tests for 64-bit XIDs +============================================= + +This directory contains a test suite for 64-bit xids. + +Running the tests +================= + + make check + +NOTE: This creates a temporary installation, and some tests may +create one or multiple nodes. + +NOTE: This requires the --enable-tap-tests argument to configure. diff --git a/src/test/xid-64/t/001_test_large_xids.pl b/src/test/xid-64/t/001_test_large_xids.pl new file mode 100644 index 00000000000..4c7dbc6cb16 --- /dev/null +++ b/src/test/xid-64/t/001_test_large_xids.pl @@ -0,0 +1,54 @@ +# Tests for large xid values +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use bigint; + +sub command_output +{ + my ($cmd) = @_; + my ($stdout, $stderr); + print("# Running: " . join(" ", @{$cmd}) . "\n"); + my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; + ok($result, "@$cmd exit code 0"); + is($stderr, '', "@$cmd no stderr"); + return $stdout; +} + +my $START_VAL = 2**32; +my $MAX_VAL = 2**62; + +my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL)); + +# Initialize master node with the random xid-related parameters +my $node = PostgreSQL::Test::Cluster->new('master'); +$node->init(extra => [ "--xid=$ixid", "--multixact-id=$imxid", "--multixact-offset=$imoff" ]); +$node->start; + +# Initialize master node and check the xid-related parameters +my $pgcd_output = command_output( + [ 'pg_controldata', '-D', $node->data_dir ] ); +print($pgcd_output); print('\n'); +ok($pgcd_output =~ qr/Latest checkpoint's NextXID:\s*(\d+)/, "XID found"); +my ($nextxid) = ($1); +ok($nextxid >= $ixid && $nextxid < $ixid + 1000, + "Latest checkpoint's NextXID ($nextxid) is close to the initial xid ($ixid)."); +ok($pgcd_output =~ qr/Latest checkpoint's NextMultiXactId:\s*(\d+)/, "MultiXactId found"); +my ($nextmxid) = ($1); +ok($nextmxid >= $imxid && $nextmxid < $imxid + 1000, + "Latest checkpoint's NextMultiXactId ($nextmxid) is close to the initial multiXactId ($imxid)."); +ok($pgcd_output =~ qr/Latest checkpoint's NextMultiOffset:\s*(\d+)/, "MultiOffset found"); +my ($nextmoff) = ($1); +ok($nextmoff >= $imoff && $nextmoff < $imoff + 1000, + "Latest checkpoint's NextMultiOffset ($nextmoff) is close to the initial multiOffset ($imoff)."); + +# Run pgbench to check whether the database is working properly +$node->command_ok( + [ qw(pgbench --initialize --no-vacuum --scale=10) ], + 'pgbench finished without errors'); + +done_testing(); \ No newline at end of file diff --git a/src/test/xid-64/t/002_test_gucs.pl b/src/test/xid-64/t/002_test_gucs.pl new file mode 100644 index 00000000000..ff9f2f30523 --- /dev/null +++ b/src/test/xid-64/t/002_test_gucs.pl @@ -0,0 +1,79 @@ +# Tests for guc boundary values +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use bigint; + +sub command_output +{ + my ($cmd) = @_; + my ($stdout, $stderr); + print("# Running: " . join(" ", @{$cmd}) . "\n"); + my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; + ok($result, "@$cmd exit code 0"); + is($stderr, '', "@$cmd no stderr"); + return $stdout; +} + +sub set_guc +{ + my ($node, $guc, $val) = @_; + print("SET $guc = $val\n"); + $node->safe_psql('postgres', "ALTER SYSTEM SET $guc = $val"); + $node->restart(); +} + +sub test_pgbench +{ + my ($node) = @_; + $node->command_ok( + [ qw(pgbench --progress=5 --transactions=1000 --jobs=5 --client=5) ], + 'pgbench finished without errors'); +} + +my @guc_vals = ( + [ "autovacuum_freeze_max_age", 100000, 2**63 - 1 ], + [ "autovacuum_multixact_freeze_max_age", 10000, 2**63 - 1 ], + [ "vacuum_freeze_min_age", 0, 2**63 - 1 ], + [ "vacuum_freeze_table_age", 0, 2**63 - 1 ], + [ "vacuum_multixact_freeze_min_age", 0, 2**63 - 1 ], + [ "vacuum_multixact_freeze_table_age", 0, 2**63 -1 ] +); + +my $START_VAL = 2**32; +my $MAX_VAL = 2**62; + +my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL)); + +# Initialize master node +my $node = PostgreSQL::Test::Cluster->new('master'); +$node->init(extra => [ "--xid=$ixid", "--multixact-id=$imxid", "--multixact-offset=$imoff" ]); +# Disable logging of all statements to avoid log bloat during pgbench +$node->append_conf('postgresql.conf', "log_statement = none"); +$node->start; + +# Fill the test database with the pgbench data +$node->command_ok( + [ qw(pgbench --initialize --scale=10) ], + 'pgbench finished without errors'); + +# Test all GUCs with minimum, maximum and random value inbetween +# (run pgbench for every configuration setting) +foreach my $gi (0 .. $#guc_vals) { + print($guc_vals[$gi][0]); print("\n"); + my $guc = $guc_vals[$gi][0]; + my $minval = $guc_vals[$gi][1]; + my $maxval = $guc_vals[$gi][2]; + set_guc($node, $guc, $minval); + test_pgbench($node); + set_guc($node, $guc, $maxval); + test_pgbench($node); + set_guc($node, $guc, $minval + int(rand($maxval - $minval))); + test_pgbench($node); +} + +done_testing(); \ No newline at end of file diff --git a/src/test/xid-64/t/003_test_integrity.pl b/src/test/xid-64/t/003_test_integrity.pl new file mode 100644 index 00000000000..ca079f11cb9 --- /dev/null +++ b/src/test/xid-64/t/003_test_integrity.pl @@ -0,0 +1,58 @@ +# Check integrity after dump/restore with different xids +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Compare; + +my $tempdir = PostgreSQL::Test::Utils::tempdir; +use bigint; + +my $START_VAL = 2**32; +my $MAX_VAL = 2**62; + +my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL)); + +# Initialize master node +my $node = PostgreSQL::Test::Cluster->new('master'); +$node->init(); +$node->start; + +# Create a database and fill it with the pgbench data +$node->safe_psql('postgres', "CREATE DATABASE pgbench_db"); +$node->command_ok( + [ qw(pgbench --initialize --scale=2 pgbench_db) ], + 'pgbench finished without errors'); +# Dump the database (cluster the main table to put data in a determined order) +$node->safe_psql('pgbench_db', qq( + CREATE INDEX pa_aid_idx ON pgbench_accounts (aid); + CLUSTER pgbench_accounts USING pa_aid_idx)); +$node->command_ok( + [ "pg_dump", "-w", "--inserts", "--file=$tempdir/pgbench.sql", "pgbench_db" ], + 'pgdump finished without errors'); +$node->stop('fast'); + +# Initialize second node +my $node2 = PostgreSQL::Test::Cluster->new('master2'); +$node2->init(extra => [ "--xid=$ixid", "--multixact-id=$imxid", "--multixact-offset=$imoff" ]); +# Disable logging of all statements to avoid log bloat during restore +$node2->append_conf('postgresql.conf', "log_statement = none"); +$node2->start; + +# Create a database and restore the previous dump +$node2->safe_psql('postgres', "CREATE DATABASE pgbench_db"); +my $txid0 = $node2->safe_psql('pgbench_db', 'SELECT txid_current()'); +print("# Initial txid_current: $txid0\n"); +$node2->command_ok(["psql", "-q", "-f", "$tempdir/pgbench.sql", "pgbench_db"]); + +# Dump the database and compare the dumped content with the previous one +$node2->safe_psql('pgbench_db', 'CLUSTER pgbench_accounts'); +$node2->command_ok( + [ "pg_dump", "-w", "--inserts", "--file=$tempdir/pgbench2.sql", "pgbench_db" ], + 'pgdump finished without errors'); +ok(File::Compare::compare_text("$tempdir/pgbench.sql", "$tempdir/pgbench2.sql") == 0, "no differences detected"); + +done_testing(); \ No newline at end of file diff --git a/src/test/xid-64/t/004_test_relminmxid.pl b/src/test/xid-64/t/004_test_relminmxid.pl new file mode 100644 index 00000000000..e1f6e556e53 --- /dev/null +++ b/src/test/xid-64/t/004_test_relminmxid.pl @@ -0,0 +1,90 @@ +# Check integrity after dump/restore with different xids +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use bigint; + +my ($node, $rmm, $vacout); +$node = PostgreSQL::Test::Cluster->new('master'); +$node->init(extra => [ "--xid=3", "--multixact-id=1", "--multixact-offset=0" ]); +$node->append_conf('postgresql.conf', 'max_prepared_transactions = 2'); +$node->start; + +sub relminmxid +{ + my $rmm = $node->safe_psql("postgres", qq( + SELECT relminmxid + FROM pg_class + WHERE relname = 'foo';)); + return $rmm + 0; +} + +sub vacuum +{ + my ($rc, $stdout, $stderr) = $node->psql("postgres", "VACUUM foo;"); + return $stdout.$stderr; +} + +sub gen_multixact +{ + $node->safe_psql("postgres", qq( + BEGIN; + SELECT * FROM foo FOR KEY SHARE; + PREPARE TRANSACTION 'fooshare'; + )); + + my $xmax = $node->safe_psql("postgres", qq( + SELECT xmax FROM foo; + )); + isnt($xmax + 0, 0, "xmax not empty"); + + $node->safe_psql("postgres", qq( + BEGIN; + SELECT * FROM foo FOR KEY SHARE; + COMMIT; + COMMIT PREPARED 'fooshare'; + )); + + my $mxact = $node->safe_psql("postgres", qq( + SELECT xmax FROM foo; + )); + isnt($mxact + 0, 0, "mxact not empty"); + cmp_ok($xmax, '>', $mxact, "xmax is greater than mxact"); +} + +# Initialize master node with the random xid-related parameters +$node->safe_psql("postgres", "CREATE TABLE foo (a int); INSERT INTO foo VALUES (1);"); + +is(relminmxid(), 1, "relminmxid is default"); + +vacuum(); +is(relminmxid(), 1, "relminmxid is still default"); + +gen_multixact(); +is(relminmxid(), 1, "relminmxid is still still default"); + +unlike(vacuum(), qr/multixact.*before relminmxid/, "no relminmxid error"); + +# No intentionally break relminmxid +$node->safe_psql("postgres", qq( + UPDATE pg_class SET relminmxid = ((1::int8<<62) + 1)::text::xid + WHERE relname = 'foo' +)); +cmp_ok(relminmxid(), '>', 2**62, "relminmxid broken (intentionally)"); + +gen_multixact(); +like(vacuum(), qr/multixact.*before relminmxid/, "got relminmxid error"); +cmp_ok(relminmxid(), '>', 2**62, "relminmxid broken (still)"); + +# Fix relminmxid by setting to default +$node->safe_psql("postgres", qq( + UPDATE pg_class SET relminmxid = '1' + WHERE relname = 'foo' +)); +is(relminmxid(), 1, "relminmxid is default again"); + +unlike(vacuum(), qr/multixact.*before relminmxid/, "no relminmxid error again"); + +done_testing(); diff --git a/src/test/xid-64/t/005_stream_subxact.pl b/src/test/xid-64/t/005_stream_subxact.pl new file mode 100644 index 00000000000..1379af6816b --- /dev/null +++ b/src/test/xid-64/t/005_stream_subxact.pl @@ -0,0 +1,100 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test xids streaming of large transaction containing large subtransactions +# near 32-bit boundary. +# +# Mostly it is a copy of 016_stream_subxact.pl, but with publisher xid inited +# just before 32-bit boundary, so if xids are replicated as 32-bit values, +# subscriber will get 0 xid value. +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Create publisher node +my $node_publisher = PostgreSQL::Test::Cluster->new('publisher'); +$node_publisher->init(allows_streaming => 'logical', extra => ['-x', '4294966545']); +$node_publisher->append_conf('postgresql.conf', + 'logical_decoding_work_mem = 64kB'); +$node_publisher->start; + +# Create subscriber node +my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +# Create some preexisting content on publisher +$node_publisher->safe_psql('postgres', + "CREATE TABLE test_tab (a int primary key, b varchar)"); +$node_publisher->safe_psql('postgres', + "INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')"); + +# Setup structure on subscriber +$node_subscriber->safe_psql('postgres', + "CREATE TABLE test_tab (a int primary key, b text, c timestamptz DEFAULT now(), d bigint DEFAULT 999)" +); + +# Setup logical replication +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION tap_pub FOR TABLE test_tab"); + +my $appname = 'tap_sub'; +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (streaming = on)" +); + +$node_publisher->wait_for_catchup($appname); + +# Also wait for initial table sync to finish +my $synced_query = + "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; +$node_subscriber->poll_query_until('postgres', $synced_query) + or die "Timed out while waiting for subscriber to synchronize data"; + +my $result = + $node_subscriber->safe_psql('postgres', + "SELECT count(*), count(c), count(d = 999) FROM test_tab"); +is($result, qq(2|2|2), 'check initial data was copied to subscriber'); + +# Insert, update and delete enough rows to exceed 64kB limit. +$node_publisher->safe_psql( + 'postgres', q{ +BEGIN; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series( 3, 500) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s1; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(501, 1000) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s2; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(1001, 1500) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s3; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(1501, 2000) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s4; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(2001, 2500) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +COMMIT; +}); + +$node_publisher->wait_for_catchup($appname); + +$result = + $node_subscriber->safe_psql('postgres', + "SELECT count(*), count(c), count(d = 999) FROM test_tab"); +is($result, qq(1667|1667|1667), + 'check data was copied to subscriber in streaming mode and extra columns contain local defaults' +); + +$node_subscriber->stop; +$node_publisher->stop; + +done_testing(); diff --git a/src/test/xid-64/t/006_zeropage.pl b/src/test/xid-64/t/006_zeropage.pl new file mode 100644 index 00000000000..fd3ac3973fa --- /dev/null +++ b/src/test/xid-64/t/006_zeropage.pl @@ -0,0 +1,33 @@ +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Check WAL for ZEROPAGE record. + +sub command_output +{ + my ($cmd) = @_; + my ($stdout, $stderr); + print("# Running: " . join(" ", @{$cmd}) . "\n"); + my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; + return $stdout; +} + +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init(extra => [ "--xid=3", "--multixact-id=3", "--multixact-offset=0" ]);; +$node->start; +my $pgdata = $node->data_dir; +my $xlogfilename0 = $node->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn())"); +#$node->command_like( +# [ 'pg_waldump', '-S', "$pgdata/pg_wal/$xlogfilename0" ], +# qr/ZEROPAGE/, +# 'pg_waldump prints start timestamp'); +my $wd_output = command_output( + [ 'pg_waldump', "$pgdata/pg_wal/$xlogfilename0" ]); +ok($wd_output =~ qr/ZEROPAGE page 0/, "ZEROPAGE found"); + +done_testing(); diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm index d30e8fcb117..80398a72402 100644 --- a/src/tools/msvc/Solution.pm +++ b/src/tools/msvc/Solution.pm @@ -463,6 +463,7 @@ sub GenerateFiles PACKAGE_TARNAME => lc qq{"$package_name"}, PACKAGE_URL => qq{"$package_url"}, PACKAGE_VERSION => qq{"$package_version"}, + XID_IS_64BIT => 1, PG_INT128_TYPE => undef, PG_INT64_TYPE => 'long long int', PG_KRB_SRVNAM => qq{"postgres"}, diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 4fb746930aa..2ddb5db6f74 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3364,8 +3364,8 @@ intset_internal_node intset_leaf_node intset_node intvKEY -itemIdCompact -itemIdCompactData +ItemIdCompact +ItemIdCompactData iterator jmp_buf join_search_hook_type -- 2.24.3 (Apple Git-128)