From 5c7089dec450c63d0f8c4352307c675dff961402 Mon Sep 17 00:00:00 2001 From: Evgeny Voropaev Date: Wed, 26 Mar 2025 21:16:15 +0800 Subject: [PATCH v60 09/15] Use 64-bit XIDs - change TransactionId to 64bit - disk tuple format (HeapTupleHeader) is (almost) unchanged: xmin and xmax remains 32bit -- now 32bit xid is named ShortTransactionId - heap page format is changed to contain xid and multixact base values, tuple's xmin and xmax are offsets from corresponding bases. -- xid_base and multi_base are stored as a page special data. PageHeader remains unmodified. - in-memory tuple (HeapTuple) was enriched with precalculated 64bit xmin/xmax. Authors: - Alexander Korotkov - Teodor Sigaev - Nikita Glukhov - Maxim Orlov - Pavel Borisov - Yura Sokolov - Aleksander Alekseev - Evgeny Voropaev Discussion: https://www.postgresql.org/message-id/flat/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com --- contrib/amcheck/verify_heapam.c | 120 +-- contrib/amcheck/verify_nbtree.c | 2 +- contrib/hstore/hstore_io.c | 2 + contrib/pageinspect/Makefile | 2 +- contrib/pageinspect/btreefuncs.c | 18 +- contrib/pageinspect/expected/btree.out | 4 +- contrib/pageinspect/expected/hash_1.out | 166 +++ .../pageinspect/expected/oldextversions.out | 10 +- contrib/pageinspect/expected/page.out | 28 +- contrib/pageinspect/heapfuncs.c | 10 +- .../pageinspect/pageinspect--1.13--1.14.sql | 145 +++ contrib/pageinspect/pageinspect--1.5.sql | 2 + contrib/pageinspect/rawpage.c | 35 +- contrib/pageinspect/sql/btree.sql | 3 +- contrib/pg_surgery/heap_surgery.c | 17 +- contrib/pg_visibility/pg_visibility.c | 7 +- contrib/pgrowlocks/pgrowlocks.c | 2 +- contrib/pgstattuple/pgstatapprox.c | 4 + contrib/pgstattuple/pgstatindex.c | 2 +- .../postgres_fdw/expected/postgres_fdw.out | 55 +- contrib/postgres_fdw/postgres_fdw.c | 9 +- contrib/postgres_fdw/sql/postgres_fdw.sql | 15 +- src/backend/access/common/heaptuple.c | 10 +- src/backend/access/common/tupdesc.c | 3 + src/backend/access/heap/heapam.c | 999 +++++++++++++++--- src/backend/access/heap/heapam_handler.c | 62 +- src/backend/access/heap/heapam_visibility.c | 173 +-- src/backend/access/heap/heapam_xlog.c | 161 ++- src/backend/access/heap/heaptoast.c | 3 + src/backend/access/heap/hio.c | 36 +- src/backend/access/heap/pruneheap.c | 131 ++- src/backend/access/heap/rewriteheap.c | 101 +- src/backend/access/heap/vacuumlazy.c | 145 +-- src/backend/access/nbtree/nbtpage.c | 2 + src/backend/access/nbtree/nbtsplitloc.c | 16 +- src/backend/access/rmgrdesc/gistdesc.c | 4 +- src/backend/access/rmgrdesc/heapdesc.c | 34 +- src/backend/access/rmgrdesc/nbtdesc.c | 4 +- src/backend/access/rmgrdesc/xactdesc.c | 6 +- src/backend/access/rmgrdesc/xlogdesc.c | 2 +- src/backend/access/transam/clog.c | 19 +- src/backend/access/transam/commit_ts.c | 19 - src/backend/access/transam/multixact.c | 52 +- src/backend/access/transam/slru.c | 11 +- src/backend/access/transam/subtrans.c | 13 +- src/backend/access/transam/transam.c | 18 +- src/backend/access/transam/twophase.c | 39 +- src/backend/access/transam/varsup.c | 177 +--- src/backend/access/transam/xact.c | 35 +- src/backend/access/transam/xlog.c | 9 +- src/backend/access/transam/xloginsert.c | 7 + src/backend/access/transam/xlogreader.c | 20 - src/backend/bootstrap/bootstrap.c | 2 +- src/backend/catalog/heap.c | 8 +- src/backend/catalog/pg_inherits.c | 2 +- src/backend/commands/async.c | 2 +- src/backend/commands/dbcommands.c | 9 +- src/backend/commands/indexcmds.c | 6 +- src/backend/commands/sequence.c | 30 +- src/backend/commands/vacuum.c | 20 +- src/backend/executor/execExprInterp.c | 1 + src/backend/executor/execTuples.c | 4 +- src/backend/executor/execUtils.c | 1 + src/backend/executor/nodeModifyTable.c | 1 + src/backend/executor/spi.c | 1 + src/backend/nodes/gen_node_support.pl | 6 +- src/backend/optimizer/util/plancat.c | 2 +- src/backend/postmaster/autovacuum.c | 64 +- src/backend/replication/logical/conflict.c | 20 +- src/backend/replication/logical/decode.c | 24 +- src/backend/replication/logical/proto.c | 50 +- .../replication/logical/reorderbuffer.c | 17 +- src/backend/replication/logical/slotsync.c | 2 +- src/backend/replication/logical/snapbuild.c | 4 +- src/backend/replication/logical/worker.c | 2 +- src/backend/replication/walreceiver.c | 28 +- src/backend/replication/walsender.c | 73 +- src/backend/statistics/extended_stats.c | 1 + src/backend/storage/buffer/Makefile | 3 +- src/backend/storage/buffer/bufmgr.c | 123 +++ src/backend/storage/buffer/heap_convert.c | 552 ++++++++++ src/backend/storage/buffer/meson.build | 1 + src/backend/storage/ipc/procarray.c | 186 ++-- src/backend/storage/ipc/standby.c | 4 +- src/backend/storage/lmgr/lmgr.c | 14 +- src/backend/storage/lmgr/lock.c | 4 +- src/backend/storage/lmgr/predicate.c | 31 +- src/backend/storage/lmgr/proc.c | 12 +- src/backend/storage/page/bufpage.c | 254 ++++- src/backend/utils/adt/enum.c | 2 +- src/backend/utils/adt/jsonfuncs.c | 2 + src/backend/utils/adt/lockfuncs.c | 9 +- src/backend/utils/adt/pgstatfuncs.c | 1 + src/backend/utils/adt/rowtypes.c | 12 + src/backend/utils/adt/xid.c | 37 +- src/backend/utils/adt/xid8funcs.c | 46 +- src/backend/utils/cache/catcache.c | 1 + src/backend/utils/cache/relcache.c | 3 +- src/backend/utils/fmgr/fmgr.c | 4 +- src/backend/utils/misc/guc_tables.c | 163 ++- src/backend/utils/misc/help_config.c | 8 +- src/backend/utils/misc/pg_controldata.c | 2 +- src/backend/utils/misc/postgresql.conf.sample | 2 +- src/backend/utils/sort/tuplesortvariants.c | 14 +- src/backend/utils/time/combocid.c | 20 +- src/backend/utils/time/snapmgr.c | 26 +- src/bin/pg_amcheck/t/004_verify_heapam.pl | 185 +++- src/bin/pg_controldata/pg_controldata.c | 2 +- src/bin/pg_dump/pg_dump.c | 27 +- src/bin/pg_dump/pg_dump.h | 8 +- src/bin/pg_resetwal/pg_resetwal.c | 53 +- src/bin/pg_resetwal/t/001_basic.pl | 18 +- src/bin/pg_upgrade/check.c | 149 ++- src/bin/pg_upgrade/controldata.c | 17 +- src/bin/pg_upgrade/file.c | 99 +- src/bin/pg_upgrade/pg_upgrade.c | 104 +- src/bin/pg_upgrade/pg_upgrade.h | 31 +- src/bin/pg_upgrade/relfilenumber.c | 34 +- src/bin/pg_upgrade/segresize.c | 70 ++ src/bin/pg_upgrade/t/002_pg_upgrade.pl | 18 + src/bin/pg_upgrade/version.c | 106 +- src/bin/pg_waldump/pg_waldump.c | 2 +- src/bin/pg_waldump/t/001_basic.pl | 3 +- src/include/access/ginblock.h | 11 +- src/include/access/gist.h | 2 +- src/include/access/heapam.h | 51 +- src/include/access/heapam_xlog.h | 29 +- src/include/access/heaptoast.h | 11 +- src/include/access/htup.h | 20 +- src/include/access/htup_details.h | 306 +++++- src/include/access/multixact.h | 8 +- src/include/access/nbtree.h | 10 + src/include/access/reloptions.h | 2 +- src/include/access/rewriteheap.h | 4 +- src/include/access/rmgrlist.h | 1 + src/include/access/slru.h | 10 +- src/include/access/tableam.h | 2 +- src/include/access/transam.h | 135 +-- src/include/access/tupmacs.h | 3 +- src/include/access/xact.h | 13 +- src/include/access/xloginsert.h | 1 + src/include/access/xlogreader.h | 4 - src/include/access/xlogrecord.h | 5 +- src/include/c.h | 21 +- src/include/catalog/catversion.h | 3 +- src/include/catalog/pg_operator.dat | 8 +- src/include/catalog/pg_proc.dat | 12 +- src/include/catalog/pg_type.dat | 4 +- src/include/catalog/pg_type.h | 5 + src/include/commands/vacuum.h | 22 +- src/include/fmgr.h | 2 + src/include/nodes/pg_list.h | 4 + src/include/pg_config.h.in | 3 + src/include/port/pg_lfind.h | 62 ++ src/include/postgres.h | 9 +- src/include/postmaster/autovacuum.h | 4 +- src/include/storage/buf_internals.h | 9 +- src/include/storage/bufmgr.h | 6 + src/include/storage/bufpage.h | 232 +++- src/include/storage/itemid.h | 2 + src/include/storage/lock.h | 14 +- src/include/storage/proc.h | 7 +- src/include/utils/combocid.h | 2 +- src/include/utils/xid8.h | 4 +- src/pl/plperl/plperl.c | 4 +- src/pl/plpgsql/src/pl_comp.c | 4 +- src/pl/plpgsql/src/pl_exec.c | 2 + src/pl/plpython/plpy_procedure.c | 4 +- src/pl/tcl/pltcl.c | 4 +- src/test/Makefile | 3 +- src/test/meson.build | 1 + .../injection_points/regress_injection.c | 3 +- src/test/modules/test_lfind/test_lfind.c | 30 +- .../perl/PostgreSQL/Test/AdjustUpgrade.pm | 4 + src/test/perl/PostgreSQL/Test/Cluster.pm | 2 +- src/test/recovery/t/003_recovery_targets.pl | 2 +- src/test/recovery/t/039_end_of_wal.pl | 22 +- src/test/regress/expected/indirect_toast.out | 8 + src/test/regress/expected/insert.out | 16 +- src/test/regress/expected/opr_sanity.out | 6 +- src/test/regress/expected/select_views.out | 86 +- src/test/regress/expected/txid.out | 8 +- src/test/regress/expected/type_sanity.out | 5 +- src/test/regress/expected/xid.out | 22 +- src/test/regress/expected/xid64.out | 92 ++ src/test/regress/parallel_schedule | 2 +- src/test/regress/regress.c | 298 ++++++ src/test/regress/sql/indirect_toast.sql | 11 + src/test/regress/sql/insert.sql | 17 +- src/test/regress/sql/select_views.sql | 2 +- src/test/regress/sql/type_sanity.sql | 5 +- src/test/regress/sql/xid.sql | 2 +- src/test/regress/sql/xid64.sql | 84 ++ src/test/xid-64/.gitignore | 8 + src/test/xid-64/Makefile | 22 + src/test/xid-64/README | 16 + src/test/xid-64/meson.build | 16 + src/test/xid-64/t/002_test_gucs.pl | 79 ++ src/test/xid-64/t/003_test_integrity.pl | 58 + src/test/xid-64/t/004_test_relminmxid.pl | 90 ++ src/test/xid-64/t/005_stream_subxact.pl | 100 ++ src/test/xid-64/t/006_zeropage.pl | 33 + src/test/xid-64/t/007_first_multi.pl | 83 ++ src/tools/pgindent/typedefs.list | 4 +- 204 files changed, 6202 insertions(+), 1930 deletions(-) create mode 100644 contrib/pageinspect/expected/hash_1.out create mode 100644 contrib/pageinspect/pageinspect--1.13--1.14.sql create mode 100644 src/backend/storage/buffer/heap_convert.c create mode 100644 src/test/regress/expected/xid64.out create mode 100644 src/test/regress/sql/xid64.sql create mode 100644 src/test/xid-64/.gitignore create mode 100644 src/test/xid-64/Makefile create mode 100644 src/test/xid-64/README create mode 100644 src/test/xid-64/meson.build create mode 100644 src/test/xid-64/t/002_test_gucs.pl create mode 100644 src/test/xid-64/t/003_test_integrity.pl create mode 100644 src/test/xid-64/t/004_test_relminmxid.pl create mode 100644 src/test/xid-64/t/005_stream_subxact.pl create mode 100644 src/test/xid-64/t/006_zeropage.pl create mode 100644 src/test/xid-64/t/007_first_multi.pl diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index cd33815c703..b030639e736 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -19,6 +19,7 @@ #include "access/toast_internals.h" #include "access/visibilitymap.h" #include "access/xact.h" +#include "catalog/catalog.h" #include "catalog/pg_am.h" #include "catalog/pg_class.h" #include "funcapi.h" @@ -89,7 +90,7 @@ typedef struct HeapCheckContext * them. */ FullTransactionId next_fxid; /* TransamVariables->nextXid */ - TransactionId next_xid; /* 32-bit version of next_fxid */ + TransactionId next_xid; /* 64-bit version of next_fxid */ TransactionId oldest_xid; /* TransamVariables->oldestXid */ FullTransactionId oldest_fxid; /* 64-bit version of oldest_xid, computed * relative to next_fxid */ @@ -130,6 +131,7 @@ typedef struct HeapCheckContext uint16 lp_len; uint16 lp_off; HeapTupleHeader tuphdr; + HeapTupleData tuple; int natts; /* Values for iterating over attributes within the tuple */ @@ -173,8 +175,6 @@ static bool check_tuple_visibility(HeapCheckContext *ctx, static void report_corruption(HeapCheckContext *ctx, char *msg); static void report_toast_corruption(HeapCheckContext *ctx, ToastedAttribute *ta, char *msg); -static FullTransactionId FullTransactionIdFromXidAndCtx(TransactionId xid, - const HeapCheckContext *ctx); static void update_cached_xid_range(HeapCheckContext *ctx); static void update_cached_mxid_range(HeapCheckContext *ctx); static XidBoundsViolation check_mxid_in_range(MultiXactId mxid, @@ -398,7 +398,7 @@ verify_heapam(PG_FUNCTION_ARGS) update_cached_xid_range(&ctx); update_cached_mxid_range(&ctx); ctx.relfrozenxid = ctx.rel->rd_rel->relfrozenxid; - ctx.relfrozenfxid = FullTransactionIdFromXidAndCtx(ctx.relfrozenxid, &ctx); + ctx.relfrozenfxid = FullTransactionIdFromXid(ctx.relfrozenxid); ctx.relminmxid = ctx.rel->rd_rel->relminmxid; if (TransactionIdIsNormal(ctx.relfrozenxid)) @@ -559,6 +559,12 @@ verify_heapam(PG_FUNCTION_ARGS) ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid); ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr); + ctx.tuple.t_data = ctx.tuphdr; + ctx.tuple.t_len = ItemIdGetLength(ctx.itemid); + ctx.tuple.t_tableOid = RelationGetRelid(ctx.rel); + HeapTupleCopyXidsFromPage(ctx.buffer, &ctx.tuple, ctx.page, + IsToastRelation(ctx.rel)); + /* Ok, ready to check this next tuple */ check_tuple(&ctx, &xmin_commit_status_ok[ctx.offnum], @@ -592,6 +598,8 @@ verify_heapam(PG_FUNCTION_ARGS) TransactionId curr_xmax; TransactionId next_xmin; OffsetNumber nextoffnum = successor[ctx.offnum]; + HeapTupleData curr_tup; + HeapTupleData next_tup; /* * The current line pointer may not have a successor, either @@ -654,9 +662,13 @@ verify_heapam(PG_FUNCTION_ARGS) if (ItemIdIsRedirected(next_lp)) continue; curr_htup = (HeapTupleHeader) PageGetItem(ctx.page, curr_lp); - curr_xmax = HeapTupleHeaderGetUpdateXid(curr_htup); + curr_tup.t_data = curr_htup; + HeapTupleCopyXidsFromPage(ctx.buffer, &curr_tup, ctx.page, false); + curr_xmax = HeapTupleGetUpdateXidAny(&curr_tup); next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp); - next_xmin = HeapTupleHeaderGetXmin(next_htup); + next_tup.t_data = next_htup; + HeapTupleCopyXidsFromPage(ctx.buffer, &next_tup, ctx.page, false); + next_xmin = HeapTupleGetXmin(&next_tup); if (!TransactionIdIsValid(curr_xmax) || !TransactionIdEquals(curr_xmax, next_xmin)) continue; @@ -710,7 +722,7 @@ verify_heapam(PG_FUNCTION_ARGS) * xmin. This should be safe because the xmin itself can't have * changed, only its commit status. */ - curr_xmin = HeapTupleHeaderGetXmin(curr_htup); + curr_xmin = HeapTupleGetXmin(&curr_tup); if (xmin_commit_status_ok[ctx.offnum] && xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS && xmin_commit_status_ok[nextoffnum] && @@ -908,7 +920,7 @@ check_tuple_header(HeapCheckContext *ctx) { HeapTupleHeader tuphdr = ctx->tuphdr; uint16 infomask = tuphdr->t_infomask; - TransactionId curr_xmax = HeapTupleHeaderGetUpdateXid(tuphdr); + TransactionId curr_xmax = HeapTupleGetUpdateXidAny(&ctx->tuple); bool result = true; unsigned expected_hoff; @@ -1026,13 +1038,14 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, XidCommitStatus xmin_status; XidCommitStatus xvac_status; XidCommitStatus xmax_status; + HeapTuple tuple = &ctx->tuple; HeapTupleHeader tuphdr = ctx->tuphdr; ctx->tuple_could_be_pruned = true; /* have not yet proven otherwise */ *xmin_commit_status_ok = false; /* have not yet proven otherwise */ /* If xmin is normal, it should be within valid range */ - xmin = HeapTupleHeaderGetXmin(tuphdr); + xmin = HeapTupleGetXmin(tuple); switch (get_xid_status(xmin, ctx, &xmin_status)) { case XID_INVALID: @@ -1046,19 +1059,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, report_corruption(ctx, psprintf("xmin %llu equals or exceeds next valid transaction ID %llu", (unsigned long long) xmin, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("xmin %llu precedes oldest valid transaction ID %llu", (unsigned long long) xmin, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("xmin %llu precedes relation freeze threshold %llu", (unsigned long long) xmin, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; } @@ -1084,19 +1097,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple equals or exceeds next valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple precedes relation freeze threshold %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved off tuple precedes oldest valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_BOUNDS_OK: break; @@ -1150,19 +1163,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple equals or exceeds next valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple precedes relation freeze threshold %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("old-style VACUUM FULL transaction ID %llu for moved in tuple precedes oldest valid transaction ID %llu", (unsigned long long) xvac, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; case XID_BOUNDS_OK: break; @@ -1239,7 +1252,7 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, * HEAP_XMAX_IS_LOCKED_ONLY is true, but for now we err on the side of * avoiding possibly-bogus complaints about missing TOAST entries. */ - xmax = HeapTupleHeaderGetRawXmax(tuphdr); + xmax = HeapTupleGetRawXmax(tuple); switch (check_mxid_valid_in_rel(xmax, ctx)) { case XID_INVALID: @@ -1298,7 +1311,7 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, * We already checked above that this multixact is within limits for * this table. Now check the update xid from this multixact. */ - xmax = HeapTupleGetUpdateXid(tuphdr); + xmax = HeapTupleGetUpdateXid(tuple); switch (get_xid_status(xmax, ctx, &xmax_status)) { case XID_INVALID: @@ -1310,19 +1323,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, report_corruption(ctx, psprintf("update xid %llu equals or exceeds next valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return true; case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("update xid %llu precedes relation freeze threshold %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return true; case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("update xid %llu precedes oldest valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return true; case XID_BOUNDS_OK: break; @@ -1362,7 +1375,7 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, } /* xmax is an XID, not a MXID. Sanity check it. */ - xmax = HeapTupleHeaderGetRawXmax(tuphdr); + xmax = HeapTupleGetRawXmax(tuple); switch (get_xid_status(xmax, ctx, &xmax_status)) { case XID_INVALID: @@ -1372,19 +1385,19 @@ check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok, report_corruption(ctx, psprintf("xmax %llu equals or exceeds next valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->next_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->next_fxid))); return false; /* corrupt */ case XID_PRECEDES_RELMIN: report_corruption(ctx, psprintf("xmax %llu precedes relation freeze threshold %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->relfrozenfxid))); + (unsigned long long) XidFromFullTransactionId(ctx->relfrozenfxid))); return false; /* corrupt */ case XID_PRECEDES_CLUSTERMIN: report_corruption(ctx, psprintf("xmax %llu precedes oldest valid transaction ID %llu", (unsigned long long) xmax, - (unsigned long long) U64FromFullTransactionId(ctx->oldest_fxid))); + (unsigned long long) XidFromFullTransactionId(ctx->oldest_fxid))); return false; /* corrupt */ case XID_BOUNDS_OK: break; @@ -1861,51 +1874,6 @@ check_tuple(HeapCheckContext *ctx, bool *xmin_commit_status_ok, ctx->attnum = -1; } -/* - * Convert a TransactionId into a FullTransactionId using our cached values of - * the valid transaction ID range. It is the caller's responsibility to have - * already updated the cached values, if necessary. This is akin to - * FullTransactionIdFromAllowableAt(), but it tolerates corruption in the form - * of an xid before epoch 0. - */ -static FullTransactionId -FullTransactionIdFromXidAndCtx(TransactionId xid, const HeapCheckContext *ctx) -{ - uint64 nextfxid_i; - int32 diff; - FullTransactionId fxid; - - Assert(TransactionIdIsNormal(ctx->next_xid)); - Assert(FullTransactionIdIsNormal(ctx->next_fxid)); - Assert(XidFromFullTransactionId(ctx->next_fxid) == ctx->next_xid); - - if (!TransactionIdIsNormal(xid)) - return FullTransactionIdFromEpochAndXid(0, xid); - - nextfxid_i = U64FromFullTransactionId(ctx->next_fxid); - - /* compute the 32bit modulo difference */ - diff = (int32) (ctx->next_xid - xid); - - /* - * In cases of corruption we might see a 32bit xid that is before epoch 0. - * We can't represent that as a 64bit xid, due to 64bit xids being - * unsigned integers, without the modulo arithmetic of 32bit xid. There's - * no really nice way to deal with that, but it works ok enough to use - * FirstNormalFullTransactionId in that case, as a freshly initdb'd - * cluster already has a newer horizon. - */ - if (diff > 0 && (nextfxid_i - FirstNormalTransactionId) < (int64) diff) - { - Assert(EpochFromFullTransactionId(ctx->next_fxid) == 0); - fxid = FirstNormalFullTransactionId; - } - else - fxid = FullTransactionIdFromU64(nextfxid_i - diff); - - Assert(FullTransactionIdIsNormal(fxid)); - return fxid; -} /* * Update our cached range of valid transaction IDs. @@ -1920,8 +1888,8 @@ update_cached_xid_range(HeapCheckContext *ctx) LWLockRelease(XidGenLock); /* And compute alternate versions of the same */ + ctx->oldest_fxid = FullTransactionIdFromXid(ctx->oldest_xid); ctx->next_xid = XidFromFullTransactionId(ctx->next_fxid); - ctx->oldest_fxid = FullTransactionIdFromXidAndCtx(ctx->oldest_xid, ctx); } /* @@ -2020,7 +1988,7 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, } /* Check if the xid is within bounds */ - fxid = FullTransactionIdFromXidAndCtx(xid, ctx); + fxid = FullTransactionIdFromXid(xid); if (!fxid_in_cached_range(fxid, ctx)) { /* @@ -2029,7 +1997,6 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, * performed the full xid conversion, reconvert. */ update_cached_xid_range(ctx); - fxid = FullTransactionIdFromXidAndCtx(xid, ctx); } if (FullTransactionIdPrecedesOrEquals(ctx->next_fxid, fxid)) @@ -2053,8 +2020,7 @@ get_xid_status(TransactionId xid, HeapCheckContext *ctx, *status = XID_COMMITTED; LWLockAcquire(XactTruncationLock, LW_SHARED); clog_horizon = - FullTransactionIdFromXidAndCtx(TransamVariables->oldestClogXid, - ctx); + FullTransactionIdFromXid(TransamVariables->oldestClogXid); if (FullTransactionIdPrecedesOrEquals(clog_horizon, fxid)) { if (TransactionIdIsCurrentTransactionId(xid)) diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index 825b677c47c..8c52e0d3225 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -581,7 +581,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace, * avoid this. */ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(rel->rd_indextuple), snapshot->xmin)) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index 2125436e40c..9fe4254e72e 100644 --- a/contrib/hstore/hstore_io.c +++ b/contrib/hstore/hstore_io.c @@ -914,6 +914,7 @@ hstore_from_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; + HeapTupleSetZeroXids(&tuple); values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); @@ -1067,6 +1068,7 @@ hstore_populate_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; + HeapTupleSetZeroXids(&tuple); } /* diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile index 9dee7653310..063c38b462c 100644 --- a/contrib/pageinspect/Makefile +++ b/contrib/pageinspect/Makefile @@ -13,7 +13,7 @@ OBJS = \ rawpage.o EXTENSION = pageinspect -DATA = pageinspect--1.12--1.13.sql \ +DATA = pageinspect--1.13--1.14.sql pageinspect--1.12--1.13.sql \ pageinspect--1.11--1.12.sql pageinspect--1.10--1.11.sql \ pageinspect--1.9--1.10.sql pageinspect--1.8--1.9.sql \ pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c index afa1947fad6..a810be45308 100644 --- a/contrib/pageinspect/btreefuncs.c +++ b/contrib/pageinspect/btreefuncs.c @@ -122,6 +122,9 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) stat->page_size = PageGetPageSize(page); + stat->btpo_prev = opaque->btpo_prev; + stat->btpo_level = opaque->btpo_level; + /* page type (flags) */ if (P_ISDELETED(opaque)) { @@ -143,11 +146,18 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) FullTransactionId safexid = BTPageGetDeleteXid(page); elog(DEBUG2, "deleted page from block %u has safexid %llu", - blkno, (unsigned long long) U64FromFullTransactionId(safexid)); + blkno, (unsigned long long) XidFromFullTransactionId(safexid)); } else - elog(DEBUG2, "deleted page from block %u has safexid %llu", - blkno, (unsigned long long) opaque->btpo_level); + { + ShortTransactionId safexid = BTP_GET_XACT(opaque); + + stat->btpo_prev = 0; + stat->btpo_level = 0; + + elog(DEBUG2, "deleted page from block %u has safexid %u", + blkno, safexid); + } /* Don't interpret BTDeletedPageData as index tuples */ maxoff = InvalidOffsetNumber; @@ -162,9 +172,7 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) stat->type = 'i'; /* btpage opaque data */ - stat->btpo_prev = opaque->btpo_prev; stat->btpo_next = opaque->btpo_next; - stat->btpo_level = opaque->btpo_level; stat->btpo_flags = opaque->btpo_flags; stat->btpo_cycleid = opaque->btpo_cycleid; diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out index 0aa5d73322f..f5b05dbc063 100644 --- a/contrib/pageinspect/expected/btree.out +++ b/contrib/pageinspect/expected/btree.out @@ -207,8 +207,8 @@ SELECT bt_page_items('aaa'::bytea); ERROR: invalid page size -- invalid special area size CREATE INDEX test1_a_brin ON test1 USING brin(a); -SELECT bt_page_items(get_raw_page('test1', 0)); -ERROR: input page is not a valid btree page +-- XXX: false positive in 64xids due to equal sizes of BTPageOpaque and HeapPageSpecialData +-- SELECT bt_page_items(get_raw_page('test1', 0)); SELECT bt_page_items(get_raw_page('test1_a_brin', 0)); ERROR: input page is not a valid btree page \set VERBOSITY default diff --git a/contrib/pageinspect/expected/hash_1.out b/contrib/pageinspect/expected/hash_1.out new file mode 100644 index 00000000000..5e64eb92602 --- /dev/null +++ b/contrib/pageinspect/expected/hash_1.out @@ -0,0 +1,166 @@ +CREATE TABLE test_hash (a int, b text); +INSERT INTO test_hash VALUES (1, 'one'); +CREATE INDEX test_hash_a_idx ON test_hash USING hash (a); +\x +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 0)); +-[ RECORD 1 ]--+--------- +hash_page_type | metapage + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 1)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 2)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 4)); +-[ RECORD 1 ]--+------- +hash_page_type | bucket + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 5)); +-[ RECORD 1 ]--+------- +hash_page_type | bitmap + +SELECT hash_page_type(get_raw_page('test_hash_a_idx', 6)); +ERROR: block number 6 is out of range for relation "test_hash_a_idx" +SELECT * FROM hash_bitmap_info('test_hash_a_idx', -1); +ERROR: invalid block number +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 0); +ERROR: invalid overflow block number 0 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 1); +ERROR: invalid overflow block number 1 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 2); +ERROR: invalid overflow block number 2 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 3); +ERROR: invalid overflow block number 3 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 4); +ERROR: invalid overflow block number 4 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 5); +ERROR: invalid overflow block number 5 +SELECT * FROM hash_bitmap_info('test_hash_a_idx', 6); +ERROR: block number 6 is out of range for relation "test_hash_a_idx" +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 0)); +-[ RECORD 1 ]-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +magic | 105121344 +version | 4 +ntuples | 1 +bsize | 8156 +bmsize | 4096 +bmshift | 15 +maxbucket | 3 +highmask | 7 +lowmask | 3 +ovflpoint | 2 +firstfree | 0 +nmaps | 1 +procid | 450 +spares | {0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} +mapp | {5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} + +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 1)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 2)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 3)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 4)); +ERROR: page is not a hash meta page +SELECT magic, version, ntuples, bsize, bmsize, bmshift, maxbucket, highmask, +lowmask, ovflpoint, firstfree, nmaps, procid, spares, mapp FROM +hash_metapage_info(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash meta page +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 0)); +ERROR: page is not a hash bucket or overflow page +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 1)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 0 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 2)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 1 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]---+----------- +live_items | 1 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 2 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 4)); +-[ RECORD 1 ]---+----------- +live_items | 0 +dead_items | 0 +page_size | 8192 +hasho_prevblkno | 3 +hasho_nextblkno | 4294967295 +hasho_bucket | 3 +hasho_flag | 2 +hasho_page_id | 65408 + +SELECT live_items, dead_items, page_size, hasho_prevblkno, hasho_nextblkno, +hasho_bucket, hasho_flag, hasho_page_id FROM +hash_page_stats(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash bucket or overflow page +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 0)); +ERROR: page is not a hash bucket or overflow page +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 1)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 2)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 3)); +-[ RECORD 1 ]---------- +itemoffset | 1 +ctid | (0,1) +data | 2389907270 + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 4)); +(0 rows) + +SELECT * FROM hash_page_items(get_raw_page('test_hash_a_idx', 5)); +ERROR: page is not a hash bucket or overflow page +DROP TABLE test_hash; diff --git a/contrib/pageinspect/expected/oldextversions.out b/contrib/pageinspect/expected/oldextversions.out index 2910891ece7..aeda3bdb522 100644 --- a/contrib/pageinspect/expected/oldextversions.out +++ b/contrib/pageinspect/expected/oldextversions.out @@ -40,16 +40,16 @@ SELECT * FROM bt_page_items('test1_a_idx', 1); -- pagesize in pageinspect >= 1.10. ALTER EXTENSION pageinspect UPDATE TO '1.9'; \df page_header - List of functions - Schema | Name | Result data type | Argument data types | Type ---------+-------------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------ - public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT prune_xid xid | func + List of functions + Schema | Name | Result data type | Argument data types | Type +--------+-------------+------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------ + public | page_header | record | page bytea, OUT lsn pg_lsn, OUT checksum smallint, OUT flags smallint, OUT lower smallint, OUT upper smallint, OUT special smallint, OUT pagesize smallint, OUT version smallint, OUT xid_base xid, OUT multi_base xid, OUT prune_xid xid | func (1 row) SELECT pagesize, version FROM page_header(get_raw_page('test1', 0)); pagesize | version ----------+--------- - 8192 | 4 + 8192 | 5 (1 row) -- brin_page_items() added a new "empty" flag in 1.12, make sure we detect diff --git a/contrib/pageinspect/expected/page.out b/contrib/pageinspect/expected/page.out index e42fd9747fd..88e6ed1eca1 100644 --- a/contrib/pageinspect/expected/page.out +++ b/contrib/pageinspect/expected/page.out @@ -49,7 +49,7 @@ SELECT get_raw_page('test1', 0) = get_raw_page('test1', 'main', 0); SELECT pagesize, version FROM page_header(get_raw_page('test1', 0)); pagesize | version ----------+--------- - 8192 | 4 + 8192 | 5 (1 row) SELECT page_checksum(get_raw_page('test1', 0), 0) IS NOT NULL AS silly_checksum_test; @@ -70,19 +70,19 @@ SELECT tuple_data_split('test1'::regclass, t_data, t_infomask, t_infomask2, t_bi SELECT * FROM fsm_page_contents(get_raw_page('test1', 'fsm', 0)); fsm_page_contents ------------------- - 0: 254 + - 1: 254 + - 3: 254 + - 7: 254 + - 15: 254 + - 31: 254 + - 63: 254 + - 127: 254 + - 255: 254 + - 511: 254 + - 1023: 254 + - 2047: 254 + - 4095: 254 + + 0: 253 + + 1: 253 + + 3: 253 + + 7: 253 + + 15: 253 + + 31: 253 + + 63: 253 + + 127: 253 + + 255: 253 + + 511: 253 + + 1023: 253 + + 2047: 253 + + 4095: 253 + fp_next_slot: 0 + (1 row) diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c index aa8e1bd6df4..cbc7d0db719 100644 --- a/contrib/pageinspect/heapfuncs.c +++ b/contrib/pageinspect/heapfuncs.c @@ -32,6 +32,7 @@ #include "funcapi.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "pageinspect.h" #include "port/pg_bitutils.h" #include "utils/array.h" #include "utils/builtins.h" @@ -163,7 +164,7 @@ heap_page_items(PG_FUNCTION_ARGS) inter_call_data->tupd = tupdesc; inter_call_data->offset = FirstOffsetNumber; - inter_call_data->page = VARDATA(raw_page); + inter_call_data->page = get_page_from_raw(raw_page); fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page); fctx->user_fctx = inter_call_data; @@ -211,6 +212,7 @@ heap_page_items(PG_FUNCTION_ARGS) lp_offset == MAXALIGN(lp_offset) && lp_offset + lp_len <= raw_page_size) { + HeapTupleData tup; HeapTupleHeader tuphdr; bytea *tuple_data_bytea; int tuple_data_len; @@ -218,9 +220,11 @@ heap_page_items(PG_FUNCTION_ARGS) /* Extract information from the tuple header */ tuphdr = (HeapTupleHeader) PageGetItem(page, id); + tup.t_data = tuphdr; + HeapTupleCopyXidsFromPage(InvalidBuffer, &tup, page, false); - values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr)); - values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr)); + values[4] = TransactionIdGetDatum(HeapTupleGetXmin(&tup)); + values[5] = TransactionIdGetDatum(HeapTupleGetRawXmax(&tup)); /* shared with xvac */ values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); values[7] = PointerGetDatum(&tuphdr->t_ctid); diff --git a/contrib/pageinspect/pageinspect--1.13--1.14.sql b/contrib/pageinspect/pageinspect--1.13--1.14.sql new file mode 100644 index 00000000000..79cb1f8ef9d --- /dev/null +++ b/contrib/pageinspect/pageinspect--1.13--1.14.sql @@ -0,0 +1,145 @@ +/* contrib/pageinspect/pageinspect--1.13--1.14.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.13'" to load this file. \quit + +-- +-- gist_page_opaque_info() +-- +DROP FUNCTION gist_page_opaque_info(bytea); +CREATE FUNCTION gist_page_opaque_info(IN page bytea, + OUT lsn pg_lsn, + OUT nsn pg_lsn, + OUT rightlink bigint, + OUT flags text[]) +AS 'MODULE_PATHNAME', 'gist_page_opaque_info' +LANGUAGE C STRICT PARALLEL SAFE; + + +-- +-- gist_page_items_bytea() +-- +DROP FUNCTION gist_page_items_bytea(bytea); +CREATE FUNCTION gist_page_items_bytea(IN page bytea, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT dead boolean, + OUT key_data bytea) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gist_page_items_bytea' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- gist_page_items() +-- +DROP FUNCTION gist_page_items(bytea, regclass); +CREATE FUNCTION gist_page_items(IN page bytea, + IN index_oid regclass, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT dead boolean, + OUT keys text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'gist_page_items' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- get_raw_page() +-- +DROP FUNCTION get_raw_page(text, int8); +DROP FUNCTION IF EXISTS get_raw_page(text, int4); +CREATE FUNCTION get_raw_page(text, int8) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +DROP FUNCTION get_raw_page(text, text, int8); +DROP FUNCTION IF EXISTS get_raw_page(text, text, int4); +CREATE FUNCTION get_raw_page(text, text, int8) +RETURNS bytea +AS 'MODULE_PATHNAME', 'get_raw_page_fork_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- page_checksum() +-- +DROP FUNCTION page_checksum(IN page bytea, IN blkno int8); +DROP FUNCTION IF EXISTS page_checksum(IN page bytea, IN blkno int4); +CREATE FUNCTION page_checksum(IN page bytea, IN blkno int8) +RETURNS smallint +AS 'MODULE_PATHNAME', 'page_checksum_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_metap() +-- +DROP FUNCTION bt_metap(text); +CREATE FUNCTION bt_metap(IN relname text, + OUT magic int4, + OUT version int4, + OUT root int8, + OUT level int8, + OUT fastroot int8, + OUT fastlevel int8, + OUT last_cleanup_num_delpages int8, + OUT last_cleanup_num_tuples float8, + OUT allequalimage boolean) +AS 'MODULE_PATHNAME', 'bt_metap' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_page_stats() +-- +DROP FUNCTION bt_page_stats(text, int8); +DROP FUNCTION IF EXISTS bt_page_stats(text, int4); +CREATE FUNCTION bt_page_stats(IN relname text, IN blkno int8, + OUT blkno int8, + OUT type "char", + OUT live_items int4, + OUT dead_items int4, + OUT avg_item_size int4, + OUT page_size int4, + OUT free_size int4, + OUT btpo_prev int8, + OUT btpo_next int8, + OUT btpo_level int8, + OUT btpo_flags int4) +AS 'MODULE_PATHNAME', 'bt_page_stats_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- bt_page_items() +-- +DROP FUNCTION bt_page_items(text, int8); +DROP FUNCTION IF EXISTS bt_page_items(text, int4); +CREATE FUNCTION bt_page_items(IN relname text, IN blkno int8, + OUT itemoffset smallint, + OUT ctid tid, + OUT itemlen smallint, + OUT nulls bool, + OUT vars bool, + OUT data text, + OUT dead boolean, + OUT htid tid, + OUT tids tid[]) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'bt_page_items_1_9' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- brin_page_items() +-- +DROP FUNCTION brin_page_items(IN page bytea, IN index_oid regclass); +CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid regclass, + OUT itemoffset int, + OUT blknum int8, + OUT attnum int, + OUT allnulls bool, + OUT hasnulls bool, + OUT placeholder bool, + OUT value text) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'brin_page_items' +LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/pageinspect--1.5.sql b/contrib/pageinspect/pageinspect--1.5.sql index 1e40c3c97e2..fdbd2995a22 100644 --- a/contrib/pageinspect/pageinspect--1.5.sql +++ b/contrib/pageinspect/pageinspect--1.5.sql @@ -28,6 +28,8 @@ CREATE FUNCTION page_header(IN page bytea, OUT special smallint, OUT pagesize smallint, OUT version smallint, + OUT xid_base xid, + OUT multi_base xid, OUT prune_xid xid) AS 'MODULE_PATHNAME', 'page_header' LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c index 617dff821a6..507885b1e9a 100644 --- a/contrib/pageinspect/rawpage.c +++ b/contrib/pageinspect/rawpage.c @@ -17,6 +17,7 @@ #include "access/htup_details.h" #include "access/relation.h" +#include "commands/sequence.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" #include "funcapi.h" @@ -251,8 +252,9 @@ page_header(PG_FUNCTION_ARGS) Datum result; HeapTuple tuple; - Datum values[9]; - bool nulls[9]; + Datum values[11]; + bool nulls[11]; + bool is_toast; Page page; PageHeader pageheader; @@ -314,12 +316,37 @@ page_header(PG_FUNCTION_ARGS) } values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page)); - values[8] = TransactionIdGetDatum(pageheader->pd_prune_xid); + is_toast = PageGetSpecialSize(page) == + MAXALIGN(sizeof(ToastPageSpecialData)); + values[8] = TransactionIdGetDatum(HeapPageGetPruneXidNoAssert((Page) page, + is_toast)); /* Build and return the tuple. */ - memset(nulls, 0, sizeof(nulls)); + if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HeapPageSpecialData))) + { + /* Heap page */ + HeapPageSpecial pageSpecial = HeapPageGetSpecial((Page) page); + + values[9] = TransactionIdGetDatum(pageSpecial->pd_xid_base); + values[10] = TransactionIdGetDatum(pageSpecial->pd_multi_base); + } + else if (PageGetSpecialSize(page) == MAXALIGN(sizeof(ToastPageSpecialData))) + { + /* TOAST page */ + ToastPageSpecial pageSpecial = ToastPageGetSpecial((Page) page); + + values[9] = TransactionIdGetDatum(pageSpecial->pd_xid_base); + nulls[10] = true; + } + else + { + /* Double xmax page */ + nulls[9] = true; + nulls[10] = true; + } + tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); diff --git a/contrib/pageinspect/sql/btree.sql b/contrib/pageinspect/sql/btree.sql index 102ebdefe3c..87f202fb9f4 100644 --- a/contrib/pageinspect/sql/btree.sql +++ b/contrib/pageinspect/sql/btree.sql @@ -51,7 +51,8 @@ SELECT bt_page_items(get_raw_page('test1_b_gist', 0)); SELECT bt_page_items('aaa'::bytea); -- invalid special area size CREATE INDEX test1_a_brin ON test1 USING brin(a); -SELECT bt_page_items(get_raw_page('test1', 0)); +-- XXX: false positive in 64xids due to equal sizes of BTPageOpaque and HeapPageSpecialData +-- SELECT bt_page_items(get_raw_page('test1', 0)); SELECT bt_page_items(get_raw_page('test1_a_brin', 0)); \set VERBOSITY default diff --git a/contrib/pg_surgery/heap_surgery.c b/contrib/pg_surgery/heap_surgery.c index 5b94b3d523e..7dd5ed44fdb 100644 --- a/contrib/pg_surgery/heap_surgery.c +++ b/contrib/pg_surgery/heap_surgery.c @@ -16,6 +16,7 @@ #include "access/relation.h" #include "access/visibilitymap.h" #include "access/xloginsert.h" +#include "catalog/catalog.h" #include "catalog/pg_am_d.h" #include "miscadmin.h" #include "storage/bufmgr.h" @@ -272,11 +273,20 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt) else { HeapTupleHeader htup; + HeapTupleData tuple; + bool is_toast; Assert(heap_force_opt == HEAP_FORCE_FREEZE); + is_toast = IsToastRelation(rel); + htup = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_data = htup; + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyXidsFromPage(buf, &tuple, page, is_toast); + /* * Reset all visibility-related fields of the tuple. This * logic should mimic heap_execute_freeze_tuple(), but we @@ -284,8 +294,11 @@ heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt) * potentially-garbled data is left behind. */ ItemPointerSet(&htup->t_ctid, blkno, curoff); - HeapTupleHeaderSetXmin(htup, FrozenTransactionId); - HeapTupleHeaderSetXmax(htup, InvalidTransactionId); + HeapTupleAndHeaderSetXmin(page, &tuple, FrozenTransactionId, + is_toast); + HeapTupleAndHeaderSetXmax(page, &tuple, InvalidTransactionId, + is_toast); + if (htup->t_infomask & HEAP_MOVED) { if (htup->t_infomask & HEAP_MOVED_OFF) diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c index 7f268a18a74..1cab4726a58 100644 --- a/contrib/pg_visibility/pg_visibility.c +++ b/contrib/pg_visibility/pg_visibility.c @@ -14,6 +14,7 @@ #include "access/htup_details.h" #include "access/visibilitymap.h" #include "access/xloginsert.h" +#include "catalog/catalog.h" #include "catalog/pg_type.h" #include "catalog/storage_xlog.h" #include "funcapi.h" @@ -811,6 +812,8 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = relid; + HeapTupleCopyXidsFromPage(buffer, &tuple, page, + IsToastRelation(rel)); /* * If we're checking whether the page is all-visible, we expect @@ -854,7 +857,7 @@ collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) */ if (check_frozen) { - if (heap_tuple_needs_eventual_freeze(tuple.t_data)) + if (heap_tuple_needs_eventual_freeze(&tuple)) record_corrupt_item(items, &tuple.t_self); } } @@ -920,7 +923,7 @@ tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer) * be set here. So just check the xmin. */ - xmin = HeapTupleHeaderGetXmin(tup->t_data); + xmin = HeapTupleGetXmin(tup); if (!TransactionIdPrecedes(xmin, OldestXmin)) return false; /* xmin not old enough for all to see */ diff --git a/contrib/pgrowlocks/pgrowlocks.c b/contrib/pgrowlocks/pgrowlocks.c index b11b8750c32..5470d61e250 100644 --- a/contrib/pgrowlocks/pgrowlocks.c +++ b/contrib/pgrowlocks/pgrowlocks.c @@ -130,7 +130,7 @@ pgrowlocks(PG_FUNCTION_ARGS) htsu = HeapTupleSatisfiesUpdate(tuple, GetCurrentCommandId(false), hscan->rs_cbuf); - xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); + xmax = HeapTupleGetRawXmax(tuple); infomask = tuple->t_data->t_infomask; /* diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index a59ff4e9d4f..1086b51b10c 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -16,6 +16,9 @@ #include "access/htup_details.h" #include "access/relation.h" #include "access/visibilitymap.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/namespace.h" #include "catalog/pg_am_d.h" #include "commands/vacuum.h" #include "funcapi.h" @@ -140,6 +143,7 @@ statapprox_heap(Relation rel, output_type *stat) tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyXidsFromPage(buf, &tuple, page, IsToastRelation(rel)); /* * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c index 4b9d76ec4e4..bcb612f3d90 100644 --- a/contrib/pgstattuple/pgstatindex.c +++ b/contrib/pgstattuple/pgstatindex.c @@ -627,7 +627,7 @@ pgstathashindex(PG_FUNCTION_ARGS) metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); stats.version = metap->hashm_version; - stats.space_per_page = metap->hashm_bsize; + stats.space_per_page = BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(HashPageOpaqueData)); _hash_relbuf(rel, metabuf); /* Get the current relation length */ diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index d1acee5a5fa..e506463dcab 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -5036,16 +5036,24 @@ UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; EXPLAIN (verbose, costs off) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------------------------- - Update on public.ft2 - Output: c1, c2, c3, c4, c5, c6, c7, c8 - -> Foreign Update on public.ft2 - Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8 -(4 rows) +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; -- can be pushed down + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort + Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8 + Sort Key: t.c1 + CTE t + -> Update on public.ft2 + Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8 + -> Foreign Update on public.ft2 + Remote SQL: UPDATE "S 1"."T 1" SET c2 = (c2 + 400), c3 = (c3 || '_update7') WHERE ((("C 1" % 10) = 7)) RETURNING "C 1", c2, c3, c4, c5, c6, c7, c8 + -> CTE Scan on t + Output: t.c1, t.c2, t.c3, t.c4, t.c5, t.c6, t.c7, t.c8 +(10 rows) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 ------+-----+--------------------+------------------------------+--------------------------+----+------------+----- 7 | 407 | 00007_update7 | Thu Jan 08 00:00:00 1970 PST | Thu Jan 08 00:00:00 1970 | 7 | 7 | foo @@ -5190,16 +5198,24 @@ UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; EXPLAIN (verbose, costs off) - DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down - QUERY PLAN --------------------------------------------------------------------------------------------- - Delete on public.ft2 - Output: c1, c4 - -> Foreign Delete on public.ft2 - Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4 -(4 rows) + WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) + SELECT * FROM t ORDER BY c1; -- can be pushed down + QUERY PLAN +---------------------------------------------------------------------------------------------------- + Sort + Output: t.c1, t.c4 + Sort Key: t.c1 + CTE t + -> Delete on public.ft2 + Output: ft2.c1, ft2.c4 + -> Foreign Delete on public.ft2 + Remote SQL: DELETE FROM "S 1"."T 1" WHERE ((("C 1" % 10) = 5)) RETURNING "C 1", c4 + -> CTE Scan on t + Output: t.c1, t.c4 +(10 rows) -DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; +WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) +SELECT * FROM t ORDER BY c1; c1 | c4 ------+------------------------------ 5 | Tue Jan 06 00:00:00 1970 PST @@ -6547,7 +6563,8 @@ INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *; 1218 | 818 | ggg_trig_update | | | (--; | ft2 | (1 row) -UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *) +SELECT * FROM t ORDER BY c1; c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 ------+-----+------------------------+------------------------------+--------------------------+----+------------+----- 8 | 608 | 00008_trig_update | Fri Jan 09 00:00:00 1970 PST | Fri Jan 09 00:00:00 1970 | 8 | 8 | foo diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 7a5439a460b..a4d03e2e2e7 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -4838,8 +4838,8 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate, * Note: no need to care about tableoid here because it will be * initialized in ExecProcessReturning(). */ - HeapTupleHeaderSetXmin(resultTup->t_data, InvalidTransactionId); - HeapTupleHeaderSetXmax(resultTup->t_data, InvalidTransactionId); + HeapTupleSetXmin(resultTup, InvalidTransactionId); + HeapTupleSetXmax(resultTup, InvalidTransactionId); HeapTupleHeaderSetCmin(resultTup->t_data, InvalidTransactionId); } @@ -7711,6 +7711,7 @@ make_tuple_from_result_row(PGresult *res, */ if (ctid) tuple->t_self = tuple->t_data->t_ctid = *ctid; + HeapTupleSetZeroXids(tuple); /* * Stomp on the xmin, xmax, and cmin fields from the tuple created by @@ -7720,8 +7721,8 @@ make_tuple_from_result_row(PGresult *res, * assumption. If we don't do this then, for example, the tuple length * ends up in the xmin field, which isn't what we want. */ - HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); - HeapTupleHeaderSetXmin(tuple->t_data, InvalidTransactionId); + HeapTupleSetXmax(tuple, InvalidTransactionId); + HeapTupleSetXmin(tuple, InvalidTransactionId); HeapTupleHeaderSetCmin(tuple->t_data, InvalidTransactionId); /* Clean up */ diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index ea6287b03fd..ac9756c4a6f 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -1492,8 +1492,10 @@ EXPLAIN (verbose, costs off) UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; -- can be pushed down UPDATE ft2 SET c2 = c2 + 300, c3 = c3 || '_update3' WHERE c1 % 10 = 3; EXPLAIN (verbose, costs off) -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; -- can be pushed down -UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; -- can be pushed down +WITH t AS (UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7' WHERE c1 % 10 = 7 RETURNING *) +SELECT * FROM t ORDER BY c1; BEGIN; EXPLAIN (verbose, costs off) UPDATE ft2 SET c2 = c2 + 400, c3 = c3 || '_update7b' WHERE c1 % 10 = 7 AND c1 < 40 @@ -1507,8 +1509,10 @@ UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT UPDATE ft2 SET c2 = ft2.c2 + 500, c3 = ft2.c3 || '_update9', c7 = DEFAULT FROM ft1 WHERE ft1.c1 = ft2.c2 AND ft1.c1 % 10 = 9; EXPLAIN (verbose, costs off) - DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; -- can be pushed down -DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4; + WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) + SELECT * FROM t ORDER BY c1; -- can be pushed down +WITH t AS (DELETE FROM ft2 WHERE c1 % 10 = 5 RETURNING c1, c4) +SELECT * FROM t ORDER BY c1; BEGIN; EXPLAIN (verbose, costs off) DELETE FROM ft2 WHERE c1 % 10 = 6 AND c1 < 40 RETURNING old.c1, c4; -- can't be pushed down @@ -1631,7 +1635,8 @@ CREATE TRIGGER t1_br_insert BEFORE INSERT OR UPDATE INSERT INTO ft2 (c1,c2,c3) VALUES (1208, 818, 'fff') RETURNING *; INSERT INTO ft2 (c1,c2,c3,c6) VALUES (1218, 818, 'ggg', '(--;') RETURNING *; -UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *; +WITH t AS (UPDATE ft2 SET c2 = c2 + 600 WHERE c1 % 10 = 8 AND c1 < 1200 RETURNING *) +SELECT * FROM t ORDER BY c1; -- Test errors thrown on remote side during update ALTER TABLE "S 1"."T 1" ADD CONSTRAINT c2positive CHECK (c2 >= 0); diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 969d1028cae..c9916f31c0f 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -738,10 +738,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) result = PointerGetDatum(&(tup->t_self)); break; case MinTransactionIdAttributeNumber: - result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmin(tup->t_data)); + result = TransactionIdGetDatum(HeapTupleGetRawXmin(tup)); break; case MaxTransactionIdAttributeNumber: - result = TransactionIdGetDatum(HeapTupleHeaderGetRawXmax(tup->t_data)); + result = TransactionIdGetDatum(HeapTupleGetRawXmax(tup)); break; case MinCommandIdAttributeNumber: case MaxCommandIdAttributeNumber: @@ -786,6 +786,7 @@ heap_copytuple(HeapTuple tuple) newTuple->t_len = tuple->t_len; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyXids(newTuple, tuple); newTuple->t_data = (HeapTupleHeader) ((char *) newTuple + HEAPTUPLESIZE); memcpy(newTuple->t_data, tuple->t_data, tuple->t_len); return newTuple; @@ -812,6 +813,7 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest) dest->t_len = src->t_len; dest->t_self = src->t_self; dest->t_tableOid = src->t_tableOid; + HeapTupleCopyXids(dest, src); dest->t_data = (HeapTupleHeader) palloc(src->t_len); memcpy(dest->t_data, src->t_data, src->t_len); } @@ -1174,6 +1176,7 @@ heap_form_tuple(TupleDesc tupleDescriptor, tuple->t_len = len; ItemPointerSetInvalid(&(tuple->t_self)); tuple->t_tableOid = InvalidOid; + HeapTupleSetZeroXids(tuple); HeapTupleHeaderSetDatumLength(td, len); HeapTupleHeaderSetTypeId(td, tupleDescriptor->tdtypeid); @@ -1258,6 +1261,7 @@ heap_modify_tuple(HeapTuple tuple, newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyXids(newTuple, tuple); return newTuple; } @@ -1321,6 +1325,7 @@ heap_modify_tuple_by_cols(HeapTuple tuple, newTuple->t_data->t_ctid = tuple->t_data->t_ctid; newTuple->t_self = tuple->t_self; newTuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyXids(newTuple, tuple); return newTuple; } @@ -1571,6 +1576,7 @@ heap_tuple_from_minimal_tuple(MinimalTuple mtup) result->t_len = len; ItemPointerSetInvalid(&(result->t_self)); result->t_tableOid = InvalidOid; + HeapTupleSetZeroXids(result); result->t_data = (HeapTupleHeader) ((char *) result + HEAPTUPLESIZE); memcpy((char *) result->t_data + MINIMAL_TUPLE_OFFSET, mtup, mtup->t_len); memset(result->t_data, 0, offsetof(HeapTupleHeaderData, t_infomask2)); diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index ed2195f14b2..f1bf631bfa4 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -90,6 +90,9 @@ populate_compact_attribute_internal(Form_pg_attribute src, case TYPALIGN_SHORT: dst->attalignby = ALIGNOF_SHORT; break; + case TYPALIGN_XID: + dst->attalignby = ALIGNOF_INT64_T; + break; default: dst->attalignby = 0; elog(ERROR, "invalid attalign value: %c", src->attalign); diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 1f5ca3e0f72..6ac653833fa 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -42,9 +42,13 @@ #include "access/xloginsert.h" #include "catalog/pg_database.h" #include "catalog/pg_database_d.h" +#include "access/xlogutils.h" +#include "catalog/index.h" +#include "catalog/namespace.h" #include "commands/vacuum.h" #include "pgstat.h" #include "port/pg_bitutils.h" +#include "storage/buf_internals.h" #include "storage/lmgr.h" #include "storage/predicate.h" #include "storage/procarray.h" @@ -54,9 +58,8 @@ #include "utils/spccache.h" #include "utils/syscache.h" - static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, - TransactionId xid, CommandId cid, int options); + CommandId cid, int options); static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tuple, @@ -105,6 +108,8 @@ static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate); static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup); static HeapTuple ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required, bool *copy); +static bool heap_page_prepare_for_xid(Relation relation, Buffer buffer, + TransactionId xid, bool multi); /* @@ -523,6 +528,8 @@ page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, loctup.t_data = (HeapTupleHeader) PageGetItem(page, lpp); loctup.t_len = ItemIdGetLength(lpp); loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd); + HeapTupleCopyRawXidsFromPage(buffer, &loctup, page, + IsToastRelation(scan->rs_base.rs_rd)); ItemPointerSet(&(loctup.t_self), block, lineoff); if (all_visible) @@ -537,6 +544,12 @@ page_collect_tuples(HeapScanDesc scan, Snapshot snapshot, if (valid) { scan->rs_vistuples[ntup] = lineoff; + /* + * Since there is no lock futher and xmin or xmax may be + * changed while base shift, copy them here. + */ + scan->rs_xmin[ntup] = loctup.t_xmin; + scan->rs_xmax[ntup] = loctup.t_xmax; ntup++; } } @@ -950,6 +963,8 @@ continue_page: tuple->t_data = (HeapTupleHeader) PageGetItem(page, lpp); tuple->t_len = ItemIdGetLength(lpp); + HeapTupleCopyXidsFromPage(scan->rs_cbuf, tuple, page, + IsToastRelation(scan->rs_base.rs_rd)); ItemPointerSet(&(tuple->t_self), scan->rs_cblock, lineoff); visible = HeapTupleSatisfiesVisibility(tuple, @@ -1029,6 +1044,9 @@ heapgettup_pagemode(HeapScanDesc scan, linesleft = scan->rs_cindex; /* lineindex now references the next or previous visible tid */ + tuple->t_xmin = scan->rs_xmin[scan->rs_cindex]; + tuple->t_xmax = scan->rs_xmax[scan->rs_cindex]; + goto continue_page; } @@ -1067,6 +1085,8 @@ continue_page: tuple->t_data = (HeapTupleHeader) PageGetItem(page, lpp); tuple->t_len = ItemIdGetLength(lpp); + tuple->t_xmin = scan->rs_xmin[lineindex]; + tuple->t_xmax = scan->rs_xmax[lineindex]; ItemPointerSet(&(tuple->t_self), scan->rs_cblock, lineoff); /* skip any tuples that don't match the scan key */ @@ -1664,6 +1684,7 @@ heap_fetch(Relation relation, tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyXidsFromPage(buffer, tuple, page, IsToastRelation(relation)); /* * check tuple visibility, then release lock @@ -1672,7 +1693,7 @@ heap_fetch(Relation relation, if (valid) PredicateLockTID(relation, &(tuple->t_self), snapshot, - HeapTupleHeaderGetXmin(tuple->t_data)); + HeapTupleGetXmin(tuple)); HeapCheckForSerializableConflictOut(valid, relation, tuple, buffer, snapshot); @@ -1749,6 +1770,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, Assert(TransactionIdIsValid(RecentXmin)); Assert(BufferGetBlockNumber(buffer) == blkno); + heapTuple->t_self = *tid; + /* Scan through possible multiple members of HOT-chain */ for (;;) { @@ -1784,6 +1807,8 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, heapTuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); heapTuple->t_len = ItemIdGetLength(lp); heapTuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyXidsFromPage(buffer, heapTuple, page, + IsToastRelation(relation)); ItemPointerSet(&heapTuple->t_self, blkno, offnum); /* @@ -1798,7 +1823,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, */ if (TransactionIdIsValid(prev_xmax) && !TransactionIdEquals(prev_xmax, - HeapTupleHeaderGetXmin(heapTuple->t_data))) + HeapTupleGetXmin(heapTuple))) break; /* @@ -1819,7 +1844,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, { ItemPointerSetOffsetNumber(tid, offnum); PredicateLockTID(relation, &heapTuple->t_self, snapshot, - HeapTupleHeaderGetXmin(heapTuple->t_data)); + HeapTupleGetXmin(heapTuple)); if (all_dead) *all_dead = false; return true; @@ -1854,7 +1879,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, blkno); offnum = ItemPointerGetOffsetNumber(&heapTuple->t_data->t_ctid); at_chain_start = false; - prev_xmax = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); + prev_xmax = HeapTupleGetUpdateXidAny(heapTuple); } else break; /* end of chain */ @@ -1940,13 +1965,14 @@ heap_get_latest_tid(TableScanDesc sscan, tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_tableOid = RelationGetRelid(relation); + HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation)); /* * After following a t_ctid link, we might arrive at an unrelated * tuple. Check for XMIN match. */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data))) + !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tp))) { UnlockReleaseBuffer(buffer); break; @@ -1965,7 +1991,7 @@ heap_get_latest_tid(TableScanDesc sscan, * If there's a valid t_ctid link, follow it, else we're done. */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || - HeapTupleHeaderIsOnlyLocked(tp.t_data) || + HeapTupleIsOnlyLocked(&tp) || HeapTupleHeaderIndicatesMovedPartitions(tp.t_data) || ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) { @@ -1974,7 +2000,7 @@ heap_get_latest_tid(TableScanDesc sscan, } ctid = tp.t_data->t_ctid; - priorXmax = HeapTupleHeaderGetUpdateXid(tp.t_data); + priorXmax = HeapTupleGetUpdateXidAny(&tp); UnlockReleaseBuffer(buffer); } /* end of loop */ } @@ -1999,7 +2025,7 @@ heap_get_latest_tid(TableScanDesc sscan, static void UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid) { - Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple), xid)); + Assert(TransactionIdEquals(HeapTupleHeaderGetRawXmax(BufferGetPage(buffer), tuple), xid)); Assert(!(tuple->t_infomask & HEAP_XMAX_IS_MULTI)); if (!(tuple->t_infomask & (HEAP_XMAX_COMMITTED | HEAP_XMAX_INVALID))) @@ -2066,6 +2092,31 @@ ReleaseBulkInsertStatePin(BulkInsertState bistate) bistate->last_free = InvalidBlockNumber; } +/* + * Add xid_base and multi base to the WAL record. + * + * WAL record must being constructed. + */ +static inline void +xlog_register_base(Page page, bool is_toast, TransactionId *xid_base, + TransactionId *multi_base) +{ + if (is_toast) + { + *xid_base = ToastPageGetSpecial(page)->pd_xid_base; + *multi_base = InvalidTransactionId; + } + else + { + HeapPageSpecial special = HeapPageGetSpecial(page); + + *xid_base = special->pd_xid_base; + *multi_base = special->pd_multi_base; + } + + XLogRegisterData((char *) xid_base, sizeof(*xid_base)); + XLogRegisterData((char *) multi_base, sizeof(*multi_base)); +} /* * heap_insert - insert tuple into a heap @@ -2105,7 +2156,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, * Note: below this point, heaptup is the data we actually intend to store * into the relation; tup is the caller's original untoasted data. */ - heaptup = heap_prepare_insert(relation, tup, xid, cid, options); + heaptup = heap_prepare_insert(relation, tup, cid, options); /* * Find buffer to insert this tuple into. If the page is all visible, @@ -2133,6 +2184,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, */ CheckForSerializableConflictIn(relation, NULL, InvalidBlockNumber); + heap_page_prepare_for_xid(relation, buffer, xid, false); + HeapTupleSetXmin(heaptup, xid); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2170,6 +2224,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, Page page = BufferGetPage(buffer); uint8 info = XLOG_HEAP_INSERT; int bufflags = 0; + TransactionId xid_base, + multi_base; /* * If this is a catalog, we need to transmit combo CIDs to properly @@ -2208,12 +2264,17 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, { xlrec.flags |= XLH_INSERT_CONTAINS_NEW_TUPLE; bufflags |= REGBUF_KEEP_DATA; - - if (IsToastRelation(relation)) - xlrec.flags |= XLH_INSERT_ON_TOAST_RELATION; } + if (IsToastRelation(relation)) + xlrec.flags |= XLH_INSERT_ON_TOAST_RELATION; + XLogBeginInsert(); + + if (info & XLOG_HEAP_INIT_PAGE) + xlog_register_base(page, IsToastRelation(relation), &xid_base, + &multi_base); + XLogRegisterData(&xlrec, SizeOfHeapInsert); xlhdr.t_infomask2 = heaptup->t_data->t_infomask2; @@ -2275,7 +2336,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, * that in any case, the header fields are also set in the original tuple. */ static HeapTuple -heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, +heap_prepare_insert(Relation relation, HeapTuple tup, CommandId cid, int options) { /* @@ -2292,12 +2353,12 @@ heap_prepare_insert(Relation relation, HeapTuple tup, TransactionId xid, tup->t_data->t_infomask &= ~(HEAP_XACT_MASK); tup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); tup->t_data->t_infomask |= HEAP_XMAX_INVALID; - HeapTupleHeaderSetXmin(tup->t_data, xid); + HeapTupleSetXmin(tup, InvalidTransactionId); if (options & HEAP_INSERT_FROZEN) - HeapTupleHeaderSetXminFrozen(tup->t_data); + HeapTupleHeaderStoreXminFrozen(tup->t_data); HeapTupleHeaderSetCmin(tup->t_data, cid); - HeapTupleHeaderSetXmax(tup->t_data, 0); /* for cleanliness */ + HeapTupleSetXmax(tup, 0); /* for cleanliness */ tup->t_tableOid = RelationGetRelid(relation); /* @@ -2389,8 +2450,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, tuple = ExecFetchSlotHeapTuple(slots[i], true, NULL); slots[i]->tts_tableOid = RelationGetRelid(relation); tuple->t_tableOid = slots[i]->tts_tableOid; - heaptuples[i] = heap_prepare_insert(relation, tuple, xid, cid, - options); + heaptuples[i] = heap_prepare_insert(relation, tuple, cid, options); } /* @@ -2465,6 +2525,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN)) all_frozen_set = true; + heap_page_prepare_for_xid(relation, buffer, xid, false); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -2472,6 +2534,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, * RelationGetBufferForTuple has ensured that the first tuple fits. * Put that on the page, and then as many other tuples as fit. */ + HeapTupleSetXmin(heaptuples[ndone], xid); RelationPutHeapTuple(relation, buffer, heaptuples[ndone], false); /* @@ -2488,6 +2551,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, if (PageGetHeapFreeSpace(page) < MAXALIGN(heaptup->t_len) + saveFreeSpace) break; + HeapTupleSetXmin(heaptup, xid); RelationPutHeapTuple(relation, buffer, heaptup, false); /* @@ -2533,6 +2597,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, char *scratchptr = scratch.data; bool init; int bufflags = 0; + TransactionId xid_base, + multi_base; /* * If the page was previously empty, we can reinit the page @@ -2623,6 +2689,11 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, bufflags |= REGBUF_KEEP_DATA; XLogBeginInsert(); + + if (info & XLOG_HEAP_INIT_PAGE) + xlog_register_base(page, IsToastRelation(relation), &xid_base, + &multi_base); + XLogRegisterData(xlrec, tupledata - scratch.data); XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags); @@ -2830,6 +2901,7 @@ heap_delete(Relation relation, ItemPointer tid, tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; + HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation)); l1: @@ -2861,7 +2933,7 @@ l1: uint16 infomask; /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(tp.t_data); + xwait = HeapTupleGetRawXmax(&tp); infomask = tp.t_data->t_infomask; /* @@ -2900,6 +2972,10 @@ l1: NULL); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + /* Copy possibly updated xid base after relocking */ + HeapTupleCopyXidsFromPage(buffer, &tp, page, + IsToastRelation(relation)); + /* * If xwait had just locked the tuple then some other xact * could update this tuple before we get to this point. Check @@ -2910,7 +2986,7 @@ l1: */ if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) || xmax_infomask_changed(tp.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&tp), xwait)) goto l1; } @@ -2937,6 +3013,10 @@ l1: XactLockTableWait(xwait, relation, &(tp.t_self), XLTW_Delete); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + /* Copy possibly updated xid base after relocking */ + HeapTupleCopyXidsFromPage(buffer, &tp, page, + IsToastRelation(relation)); + /* * xwait is done, but if xwait had just locked the tuple then some * other xact could update this tuple before we get to this point. @@ -2947,7 +3027,7 @@ l1: */ if ((vmbuffer == InvalidBuffer && PageIsAllVisible(page)) || xmax_infomask_changed(tp.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tp.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&tp), xwait)) goto l1; @@ -2961,7 +3041,7 @@ l1: */ if ((tp.t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tp.t_data->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tp.t_data)) + HeapTupleIsOnlyLocked(&tp)) result = TM_Ok; else if (!ItemPointerEquals(&tp.t_self, &tp.t_data->t_ctid)) result = TM_Updated; @@ -2991,9 +3071,9 @@ l1: if (result != TM_Ok) { tmfd->ctid = tp.t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(tp.t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(&tp); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(tp.t_data); + tmfd->cmax = HeapTupleGetCmax(&tp); else tmfd->cmax = InvalidCommandId; UnlockReleaseBuffer(buffer); @@ -3016,7 +3096,7 @@ l1: CheckForSerializableConflictIn(relation, tid, BufferGetBlockNumber(buffer)); /* replace cid with a combo CID if necessary */ - HeapTupleHeaderAdjustCmax(tp.t_data, &cid, &iscombo); + HeapTupleAdjustCmax(&tp, &cid, &iscombo); /* * Compute replica identity tuple before entering the critical section so @@ -3034,11 +3114,20 @@ l1: */ MultiXactIdSetOldestMember(); - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(tp.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&tp), tp.t_data->t_infomask, tp.t_data->t_infomask2, xid, LockTupleExclusive, true, &new_xmax, &new_infomask, &new_infomask2); +#ifdef USE_ASSERT_CHECKING + if (IsToastRelation(relation)) + Assert((new_infomask & HEAP_XMAX_IS_MULTI) == 0); +#endif + + heap_page_prepare_for_xid(relation, buffer, new_xmax, + (new_infomask & HEAP_XMAX_IS_MULTI) != 0); + HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation)); + START_CRIT_SECTION(); /* @@ -3048,7 +3137,7 @@ l1: * the subsequent page pruning will be a no-op and the hint will be * cleared. */ - PageSetPrunable(page, xid); + PageSetPrunable(page, xid, IsToastRelation(relation)); if (PageIsAllVisible(page)) { @@ -3064,7 +3153,7 @@ l1: tp.t_data->t_infomask |= new_infomask; tp.t_data->t_infomask2 |= new_infomask2; HeapTupleHeaderClearHotUpdated(tp.t_data); - HeapTupleHeaderSetXmax(tp.t_data, new_xmax); + HeapTupleAndHeaderSetXmax(page, &tp, new_xmax, IsToastRelation(relation)); HeapTupleHeaderSetCmax(tp.t_data, cid, iscombo); /* Make sure there is no forward chain link in t_ctid */ tp.t_data->t_ctid = tp.t_self; @@ -3103,6 +3192,8 @@ l1: tp.t_data->t_infomask2); xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self); xlrec.xmax = new_xmax; + if (IsToastRelation(relation)) + xlrec.flags |= XLH_DELETE_PAGE_ON_TOAST_RELATION; if (old_key_tuple != NULL) { @@ -3260,7 +3351,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, HeapTuple heaptup; HeapTuple old_key_tuple = NULL; bool old_key_copied = false; - Page page; + Page page, + newpage; BlockNumber block; MultiXactStatus mxact_status; Buffer buffer, @@ -3287,6 +3379,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, infomask_new_tuple, infomask2_new_tuple; + Assert(!IsToastRelation(relation)); Assert(ItemPointerIsValid(otid)); /* Cheap, simplistic check that the tuple matches the rel's rowtype. */ @@ -3407,6 +3500,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp); oldtup.t_len = ItemIdGetLength(lp); oldtup.t_self = *otid; + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); /* the new tuple is ready, except for this: */ newtup->t_tableOid = RelationGetRelid(relation); @@ -3500,7 +3594,7 @@ l2: */ /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data); + xwait = HeapTupleGetRawXmax(&oldtup); infomask = oldtup.t_data->t_infomask; /* @@ -3551,6 +3645,7 @@ l2: checked_lockers = true; locker_remains = remain != 0; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); /* * If xwait had just locked the tuple then some other xact @@ -3559,7 +3654,7 @@ l2: */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(oldtup.t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(&oldtup), xwait)) goto l2; } @@ -3585,7 +3680,7 @@ l2: * subxact aborts. */ if (!HEAP_XMAX_IS_LOCKED_ONLY(oldtup.t_data->t_infomask)) - update_xact = HeapTupleGetUpdateXid(oldtup.t_data); + update_xact = HeapTupleGetUpdateXid(&oldtup); else update_xact = InvalidTransactionId; @@ -3632,7 +3727,7 @@ l2: XLTW_Update); checked_lockers = true; LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); /* * xwait is done, but if xwait had just locked the tuple then some * other xact could update this tuple before we get to this point. @@ -3640,7 +3735,7 @@ l2: */ if (xmax_infomask_changed(oldtup.t_data->t_infomask, infomask) || !TransactionIdEquals(xwait, - HeapTupleHeaderGetRawXmax(oldtup.t_data))) + HeapTupleGetRawXmax(&oldtup))) goto l2; /* Otherwise check if it committed or aborted */ @@ -3679,9 +3774,9 @@ l2: if (result != TM_Ok) { tmfd->ctid = oldtup.t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(oldtup.t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(&oldtup); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(oldtup.t_data); + tmfd->cmax = HeapTupleGetCmax(&oldtup); else tmfd->cmax = InvalidCommandId; UnlockReleaseBuffer(buffer); @@ -3714,6 +3809,7 @@ l2: LockBuffer(buffer, BUFFER_LOCK_UNLOCK); visibilitymap_pin(relation, block, &vmbuffer); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); goto l2; } @@ -3723,7 +3819,7 @@ l2: * If the tuple we're updating is locked, we need to preserve the locking * info in the old tuple's Xmax. Prepare a new Xmax value for this. */ - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, true, @@ -3742,7 +3838,7 @@ l2: (checked_lockers && !locker_remains)) xmax_new_tuple = InvalidTransactionId; else - xmax_new_tuple = HeapTupleHeaderGetRawXmax(oldtup.t_data); + xmax_new_tuple = HeapTupleGetRawXmax(&oldtup); if (!TransactionIdIsValid(xmax_new_tuple)) { @@ -3775,17 +3871,15 @@ l2: */ newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK); newtup->t_data->t_infomask2 &= ~(HEAP2_XACT_MASK); - HeapTupleHeaderSetXmin(newtup->t_data, xid); HeapTupleHeaderSetCmin(newtup->t_data, cid); newtup->t_data->t_infomask |= HEAP_UPDATED | infomask_new_tuple; newtup->t_data->t_infomask2 |= infomask2_new_tuple; - HeapTupleHeaderSetXmax(newtup->t_data, xmax_new_tuple); /* * Replace cid with a combo CID if necessary. Note that we already put * the plain cid into the new tuple. */ - HeapTupleHeaderAdjustCmax(oldtup.t_data, &cid, &iscombo); + HeapTupleAdjustCmax(&oldtup, &cid, &iscombo); /* * If the toaster needs to be activated, OR if the new tuple will not fit @@ -3815,7 +3909,7 @@ l2: newtupsize = MAXALIGN(newtup->t_len); - if (need_toast || newtupsize > pagefree) + if (need_toast || newtupsize > pagefree || HeapPageIsDoubleXmax(page)) { TransactionId xmax_lock_old_tuple; uint16 infomask_lock_old_tuple, @@ -3840,7 +3934,7 @@ l2: * updating, because the potentially created multixact would otherwise * be wrong. */ - compute_new_xmax_infomask(HeapTupleHeaderGetRawXmax(oldtup.t_data), + compute_new_xmax_infomask(HeapTupleGetRawXmax(&oldtup), oldtup.t_data->t_infomask, oldtup.t_data->t_infomask2, xid, *lockmode, false, @@ -3849,6 +3943,10 @@ l2: Assert(HEAP_XMAX_IS_LOCKED_ONLY(infomask_lock_old_tuple)); + heap_page_prepare_for_xid(relation, buffer, xmax_lock_old_tuple, + (infomask_lock_old_tuple & HEAP_XMAX_IS_MULTI) != 0); + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); + START_CRIT_SECTION(); /* Clear obsolete visibility flags ... */ @@ -3857,9 +3955,9 @@ l2: HeapTupleClearHotUpdated(&oldtup); /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_lock_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_lock_old_tuple); oldtup.t_data->t_infomask |= infomask_lock_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_lock_old_tuple; + HeapTupleAndHeaderSetXmax(page, &oldtup, xmax_lock_old_tuple, false); HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); /* temporarily make it look not-updated, but locked */ @@ -3942,7 +4040,11 @@ l2: */ for (;;) { - if (newtupsize > pagefree) + /* + * We can't fit new tuple to "double xmax" page, since it's + * impossible to set xmin there. + */ + if (newtupsize > pagefree || HeapPageIsDoubleXmax(page)) { /* It doesn't fit, must use RelationGetBufferForTuple. */ newbuf = RelationGetBufferForTuple(relation, heaptup->t_len, @@ -3976,6 +4078,9 @@ l2: break; } } + + /* Copy possibly updated xid base to old tuple after relocking */ + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); } else { @@ -4047,6 +4152,33 @@ l2: id_has_external, &old_key_copied); + newpage = BufferGetPage(newbuf); + + /* + * Prepare pages for the current xid, that witten to the new tuple's Xmax + * and old page's pd_prune_xid. + */ + heap_page_prepare_for_xid(relation, buffer, xid, false); + if (newbuf != buffer) + heap_page_prepare_for_xid(relation, newbuf, xid, false); + + /* Prepare pages for tuple's Xmax */ + heap_page_prepare_for_xid(relation, buffer, xmax_old_tuple, + (infomask_old_tuple & HEAP_XMAX_IS_MULTI) != 0); + heap_page_prepare_for_xid(relation, newbuf, xmax_new_tuple, + (heaptup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) != 0); + + /* Copy possibly updated Xid bases to the both tuples. */ + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); + + /* + * Set new tuple's Xmin/Xmax, old tuple's Xmin/Xmax were already shifted. + */ + HeapTupleAndHeaderSetXmin(newpage, heaptup, xid, + IsToastRelation(relation)); + HeapTupleAndHeaderSetXmax(newpage, heaptup, xmax_new_tuple, + IsToastRelation(relation)); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -4062,7 +4194,7 @@ l2: * not to optimize for aborts. Note that heap_xlog_update must be kept in * sync if this decision changes. */ - PageSetPrunable(page, xid); + PageSetPrunable(page, xid, false); if (use_hot_update) { @@ -4089,10 +4221,11 @@ l2: oldtup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; /* ... and store info about transaction updating this tuple */ Assert(TransactionIdIsValid(xmax_old_tuple)); - HeapTupleHeaderSetXmax(oldtup.t_data, xmax_old_tuple); oldtup.t_data->t_infomask |= infomask_old_tuple; oldtup.t_data->t_infomask2 |= infomask2_old_tuple; + HeapTupleAndHeaderSetXmax(page, &oldtup, xmax_old_tuple, false); HeapTupleHeaderSetCmax(oldtup.t_data, cid, iscombo); + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, false); /* record address of new tuple in t_ctid of old one */ oldtup.t_data->t_ctid = heaptup->t_self; @@ -4146,6 +4279,18 @@ l2: END_CRIT_SECTION(); + if (newtup != heaptup) + { + /* + * Set new tuple's Xmin/Xmax only after both xid base preparations. + * Old tuple's Xmin/Xmax were already shifted because old tuple is on + * the page. + */ + Assert(!IsToastRelation(relation)); + HeapTupleAndHeaderSetXmin(newpage, heaptup, xid, false); + HeapTupleAndHeaderSetXmax(newpage, newtup, xmax_new_tuple, false); + } + if (newbuf != buffer) LockBuffer(newbuf, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); @@ -4615,6 +4760,7 @@ heap_lock_tuple(Relation relation, HeapTuple tuple, tuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); tuple->t_len = ItemIdGetLength(lp); tuple->t_tableOid = RelationGetRelid(relation); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); l3: result = HeapTupleSatisfiesUpdate(tuple, cid, *buffer); @@ -4641,7 +4787,7 @@ l3: ItemPointerData t_ctid; /* must copy state data before unlocking buffer */ - xwait = HeapTupleHeaderGetRawXmax(tuple->t_data); + xwait = HeapTupleGetRawXmax(tuple); infomask = tuple->t_data->t_infomask; infomask2 = tuple->t_data->t_infomask2; ItemPointerCopy(&tuple->t_data->t_ctid, &t_ctid); @@ -4799,11 +4945,13 @@ l3: result = res; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); goto failed; } } LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); /* * Make sure it's still an appropriate lock, else start over. @@ -4812,7 +4960,7 @@ l3: * now need to follow the update chain to lock the new * versions. */ - if (!HeapTupleHeaderIsOnlyLocked(tuple->t_data) && + if (!HeapTupleIsOnlyLocked(tuple) && ((tuple->t_data->t_infomask2 & HEAP_KEYS_UPDATED) || !updated)) goto l3; @@ -4839,6 +4987,7 @@ l3: !HEAP_XMAX_IS_EXCL_LOCKED(infomask)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); /* * Make sure it's still an appropriate lock, else start over. @@ -4867,8 +5016,10 @@ l3: * meantime, start over. */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); + if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; @@ -4879,10 +5030,11 @@ l3: else if (HEAP_XMAX_IS_KEYSHR_LOCKED(infomask)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); /* if the xmax changed in the meantime, start over */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; /* otherwise, we're good */ @@ -4907,8 +5059,10 @@ l3: { /* ... but if the xmax changed in the meantime, start over */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); + if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; Assert(HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask)); @@ -4929,6 +5083,7 @@ l3: if (require_sleep && (result == TM_Updated || result == TM_Deleted)) { LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); goto failed; } else if (require_sleep) @@ -4954,6 +5109,7 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); goto failed; } @@ -4980,6 +5136,8 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, + false); goto failed; } break; @@ -5020,6 +5178,8 @@ l3: result = TM_WouldBlock; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, + false); goto failed; } break; @@ -5046,11 +5206,13 @@ l3: result = res; /* recovery code expects to have buffer lock held */ LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); goto failed; } } LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); /* * xwait is done, but if xwait had just locked the tuple then some @@ -5058,7 +5220,7 @@ l3: * Check for xmax change, and start over if so. */ if (xmax_infomask_changed(tuple->t_data->t_infomask, infomask) || - !TransactionIdEquals(HeapTupleHeaderGetRawXmax(tuple->t_data), + !TransactionIdEquals(HeapTupleGetRawXmax(tuple), xwait)) goto l3; @@ -5086,7 +5248,7 @@ l3: if (!require_sleep || (tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_data->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tuple->t_data)) + HeapTupleIsOnlyLocked(tuple)) result = TM_Ok; else if (!ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid)) result = TM_Updated; @@ -5112,9 +5274,9 @@ failed: Assert(result != TM_Updated || !ItemPointerEquals(&tuple->t_self, &tuple->t_data->t_ctid)); tmfd->ctid = tuple->t_data->t_ctid; - tmfd->xmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); + tmfd->xmax = HeapTupleGetUpdateXidAny(tuple); if (result == TM_SelfModified) - tmfd->cmax = HeapTupleHeaderGetCmax(tuple->t_data); + tmfd->cmax = HeapTupleGetCmax(tuple); else tmfd->cmax = InvalidCommandId; goto out_locked; @@ -5134,10 +5296,11 @@ failed: LockBuffer(*buffer, BUFFER_LOCK_UNLOCK); visibilitymap_pin(relation, block, &vmbuffer); LockBuffer(*buffer, BUFFER_LOCK_EXCLUSIVE); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); goto l3; } - xmax = HeapTupleHeaderGetRawXmax(tuple->t_data); + xmax = HeapTupleGetRawXmax(tuple); old_infomask = tuple->t_data->t_infomask; /* @@ -5159,6 +5322,10 @@ failed: GetCurrentTransactionId(), mode, false, &xid, &new_infomask, &new_infomask2); + heap_page_prepare_for_xid(relation, *buffer, xid, + (new_infomask & HEAP_XMAX_IS_MULTI) != 0); + HeapTupleCopyXidsFromPage(*buffer, tuple, page, false); + START_CRIT_SECTION(); /* @@ -5177,7 +5344,8 @@ failed: tuple->t_data->t_infomask2 |= new_infomask2; if (HEAP_XMAX_IS_LOCKED_ONLY(new_infomask)) HeapTupleHeaderClearHotUpdated(tuple->t_data); - HeapTupleHeaderSetXmax(tuple->t_data, xid); + Assert(!IsToastRelation(relation)); + HeapTupleAndHeaderSetXmax(page, tuple, xid, false); /* * Make sure there is no forward chain link in t_ctid. Note that in the @@ -5771,12 +5939,19 @@ l4: LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); } + /* + * Copy xid base after buffer relocking, it could have changed since + * heap_fetch(). + */ + HeapTupleCopyXidsFromPage(buf, &mytup, BufferGetPage(buf), + IsToastRelation(rel)); + /* * Check the tuple XMIN against prior XMAX, if any. If we reached the * end of the chain, we're done, so return success. */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data), + !TransactionIdEquals(HeapTupleGetXmin(&mytup), priorXmax)) { result = TM_Ok; @@ -5788,7 +5963,7 @@ l4: * (sub)transaction, then we already locked the last live one in the * chain, thus we're done, so return success. */ - if (TransactionIdDidAbort(HeapTupleHeaderGetXmin(mytup.t_data))) + if (TransactionIdDidAbort(HeapTupleGetXmin(&mytup))) { result = TM_Ok; goto out_locked; @@ -5796,7 +5971,7 @@ l4: old_infomask = mytup.t_data->t_infomask; old_infomask2 = mytup.t_data->t_infomask2; - xmax = HeapTupleHeaderGetRawXmax(mytup.t_data); + xmax = HeapTupleGetRawXmax(&mytup); /* * If this tuple version has been updated or locked by some concurrent @@ -5809,7 +5984,7 @@ l4: TransactionId rawxmax; bool needwait; - rawxmax = HeapTupleHeaderGetRawXmax(mytup.t_data); + rawxmax = HeapTupleGetRawXmax(&mytup); if (old_infomask & HEAP_XMAX_IS_MULTI) { int nmembers; @@ -5950,14 +6125,25 @@ l4: VISIBILITYMAP_ALL_FROZEN)) cleared_all_frozen = true; +#ifdef USE_ASSERT_CHECKING + if (IsToastRelation(rel)) + Assert((new_infomask & HEAP_XMAX_IS_MULTI) == 0); +#endif + + heap_page_prepare_for_xid(rel, buf, new_xmax, + (new_infomask & HEAP_XMAX_IS_MULTI) != 0); + HeapTupleCopyXidsFromPage(buf, &mytup, BufferGetPage(buf), + IsToastRelation(rel)); + START_CRIT_SECTION(); /* ... and set them */ - HeapTupleHeaderSetXmax(mytup.t_data, new_xmax); mytup.t_data->t_infomask &= ~HEAP_XMAX_BITS; mytup.t_data->t_infomask2 &= ~HEAP_KEYS_UPDATED; mytup.t_data->t_infomask |= new_infomask; mytup.t_data->t_infomask2 |= new_infomask2; + Assert(!IsToastRelation(rel)); + HeapTupleAndHeaderSetXmax(BufferGetPage(buf), &mytup, new_xmax, false); MarkBufferDirty(buf); @@ -5991,14 +6177,14 @@ next: if (mytup.t_data->t_infomask & HEAP_XMAX_INVALID || HeapTupleHeaderIndicatesMovedPartitions(mytup.t_data) || ItemPointerEquals(&mytup.t_self, &mytup.t_data->t_ctid) || - HeapTupleHeaderIsOnlyLocked(mytup.t_data)) + HeapTupleIsOnlyLocked(&mytup)) { result = TM_Ok; goto out_locked; } /* tail recursion */ - priorXmax = HeapTupleHeaderGetUpdateXid(mytup.t_data); + priorXmax = HeapTupleGetUpdateXidAny(&mytup); ItemPointerCopy(&(mytup.t_data->t_ctid), &tupid); UnlockReleaseBuffer(buf); } @@ -6200,12 +6386,13 @@ heap_abort_speculative(Relation relation, ItemPointer tid) tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); tp.t_len = ItemIdGetLength(lp); tp.t_self = *tid; + HeapTupleCopyXidsFromPage(buffer, &tp, page, IsToastRelation(relation)); /* * Sanity check that the tuple really is a speculatively inserted tuple, * inserted by us. */ - if (tp.t_data->t_choice.t_heap.t_xmin != xid) + if (HeapTupleGetRawXmin(&tp) != xid) elog(ERROR, "attempted to kill a tuple inserted by another transaction"); if (!(IsToastRelation(relation) || HeapTupleHeaderIsSpeculative(tp.t_data))) elog(ERROR, "attempted to kill a non-speculative tuple"); @@ -6238,7 +6425,9 @@ heap_abort_speculative(Relation relation, ItemPointer tid) prune_xid = relfrozenxid; else prune_xid = TransactionXmin; - PageSetPrunable(page, prune_xid); + Assert(TransactionIdIsValid(prune_xid)); + heap_page_prepare_for_xid(relation, buffer, prune_xid, false); + PageSetPrunable(page, prune_xid, IsToastRelation(relation)); } /* store transaction information of xact deleting the tuple */ @@ -6248,9 +6437,12 @@ heap_abort_speculative(Relation relation, ItemPointer tid) /* * Set the tuple header xmin to InvalidTransactionId. This makes the * tuple immediately invisible everyone. (In particular, to any - * transactions waiting on the speculative token, woken up later.) + * transactions waiting on the speculative token, woken up later.) Don't + * need to reload xid base from page because InvalidTransactionId doesn't + * require xid base to be valid. */ - HeapTupleHeaderSetXmin(tp.t_data, InvalidTransactionId); + HeapTupleAndHeaderSetXmin(page, &tp, InvalidTransactionId, + IsToastRelation(relation)); /* Clear the speculative insertion token too */ tp.t_data->t_ctid = tp.t_self; @@ -6269,6 +6461,8 @@ heap_abort_speculative(Relation relation, ItemPointer tid) XLogRecPtr recptr; xlrec.flags = XLH_DELETE_IS_SUPER; + if (IsToastRelation(relation)) + xlrec.flags |= XLH_DELETE_PAGE_ON_TOAST_RELATION; xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask, tp.t_data->t_infomask2); xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self); @@ -6356,6 +6550,7 @@ heap_inplace_lock(Relation relation, HeapTupleData oldtup = *oldtup_ptr; /* minimize diff vs. heap_update() */ TM_Result result; bool ret; + Page page; #ifdef USE_ASSERT_CHECKING if (RelationGetRelid(relation) == RelationRelationId) @@ -6377,6 +6572,8 @@ heap_inplace_lock(Relation relation, LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + page = (Page) BufferGetPage(buffer); + HeapTupleCopyXidsFromPage(buffer, &oldtup, page, IsToastRelation(relation)); /*---------- * Interpret HeapTupleSatisfiesUpdate() like heap_update() does, except: @@ -6414,7 +6611,7 @@ heap_inplace_lock(Relation relation, TransactionId xwait; uint16 infomask; - xwait = HeapTupleHeaderGetRawXmax(oldtup.t_data); + xwait = HeapTupleGetRawXmax(&oldtup); infomask = oldtup.t_data->t_infomask; if (infomask & HEAP_XMAX_IS_MULTI) @@ -6765,7 +6962,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * been pruned away instead, since updater XID is < OldestXmin). * Just remove xmax. */ - if (TransactionIdDidCommit(update_xact)) + if (!TransactionIdDidAbort(update_xact)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("multixact %llu contains non-aborted update XID %llu from before removable cutoff %llu", @@ -6863,7 +7060,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * even member XIDs >= OldestXmin often won't be kept by second pass. */ nnewmembers = 0; - newmembers = palloc(sizeof(MultiXactMember) * nmembers); + newmembers = palloc0(sizeof(MultiXactMember) * nmembers); has_lockers = false; update_xid = InvalidTransactionId; update_committed = false; @@ -7049,7 +7246,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, * then caller had better have an exclusive lock on it already. */ bool -heap_prepare_freeze_tuple(HeapTupleHeader tuple, +heap_prepare_freeze_tuple(HeapTuple htup, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen) @@ -7061,8 +7258,9 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, replace_xmax = false, freeze_xmax = false; TransactionId xid; + HeapTupleHeader tuple = htup->t_data; - frz->xmax = HeapTupleHeaderGetRawXmax(tuple); + frz->xmax = HeapTupleGetRawXmax(htup); frz->t_infomask2 = tuple->t_infomask2; frz->t_infomask = tuple->t_infomask; frz->frzflags = 0; @@ -7073,7 +7271,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * will become frozen iff our freeze plan is executed by caller (could be * neither). */ - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(htup); if (!TransactionIdIsNormal(xid)) xmin_already_frozen = true; else @@ -7215,6 +7413,15 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, /* MultiXactId processing forces freezing (barring FRM_NOOP case) */ Assert(pagefrz->freeze_required || (!freeze_xmax && !replace_xmax)); } + else if ((tuple->t_infomask & HEAP_XMAX_INVALID) && + TransactionIdIsNormal(xid)) + { + /* + * To reset xmax without reading clog. + * This prevent excess growth of xmax. + */ + freeze_xmax = true; + } else if (TransactionIdIsNormal(xid)) { /* Raw xmax is normal XID */ @@ -7236,7 +7443,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, if (freeze_xmax && !HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) frz->checkflags |= HEAP_FREEZE_CHECK_XMAX_ABORTED; } - else if (!TransactionIdIsValid(xid)) + else if (!TransactionIdIsValid(HeapTupleGetRawXmax(htup))) { /* Raw xmax is InvalidTransactionId XID */ Assert((tuple->t_infomask & HEAP_XMAX_IS_MULTI) == 0); @@ -7306,7 +7513,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * Does this tuple force caller to freeze the entire page? */ pagefrz->freeze_required = - heap_tuple_should_freeze(tuple, cutoffs, + heap_tuple_should_freeze(htup, cutoffs, &pagefrz->NoFreezePageRelfrozenXid, &pagefrz->NoFreezePageRelminMxid); } @@ -7324,7 +7531,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, * successive VACUUMs that each decide against freezing the same page. */ void -heap_pre_freeze_checks(Buffer buffer, +heap_pre_freeze_checks(Relation rel, Buffer buffer, HeapTupleFreeze *tuples, int ntuples) { Page page = BufferGetPage(buffer); @@ -7333,34 +7540,31 @@ heap_pre_freeze_checks(Buffer buffer, { HeapTupleFreeze *frz = tuples + i; ItemId itemid = PageGetItemId(page, frz->offset); - HeapTupleHeader htup; + HeapTupleData tuple; - htup = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + HeapTupleCopyXidsFromPage(buffer, &tuple, page, IsToastRelation(rel)); /* Deliberately avoid relying on tuple hint bits here */ if (frz->checkflags & HEAP_FREEZE_CHECK_XMIN_COMMITTED) { - TransactionId xmin = HeapTupleHeaderGetRawXmin(htup); + TransactionId xmin = HeapTupleGetXmin(&tuple); - Assert(!HeapTupleHeaderXminFrozen(htup)); + Assert(!HeapTupleHeaderXminFrozen(tuple.t_data)); if (unlikely(!TransactionIdDidCommit(xmin))) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("uncommitted xmin %llu needs to be frozen", (unsigned long long) xmin))); } - - /* - * TransactionIdDidAbort won't work reliably in the presence of XIDs - * left behind by transactions that were in progress during a crash, - * so we can only check that xmax didn't commit - */ if (frz->checkflags & HEAP_FREEZE_CHECK_XMAX_ABORTED) { - TransactionId xmax = HeapTupleHeaderGetRawXmax(htup); + TransactionId xmax = HeapTupleGetRawXmax(&tuple); Assert(TransactionIdIsNormal(xmax)); - if (unlikely(TransactionIdDidCommit(xmax))) + if (unlikely(!TransactionIdDidAbort(xmax))) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("cannot freeze non-aborted xmax %llu", @@ -7377,7 +7581,7 @@ heap_pre_freeze_checks(Buffer buffer, * if needed, emits WAL. */ void -heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples) +heap_freeze_prepared_tuples(Relation rel, Buffer buffer, HeapTupleFreeze *tuples, int ntuples) { Page page = BufferGetPage(buffer); @@ -7388,7 +7592,9 @@ heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples) HeapTupleHeader htup; htup = (HeapTupleHeader) PageGetItem(page, itemid); - heap_execute_freeze_tuple(htup, frz); + heap_execute_freeze_tuple_page(page, htup, frz, + IsToastRelation(rel)); + } } @@ -7399,7 +7605,7 @@ heap_freeze_prepared_tuples(Buffer buffer, HeapTupleFreeze *tuples, int ntuples) * Useful for callers like CLUSTER that perform their own WAL logging. */ bool -heap_freeze_tuple(HeapTupleHeader tuple, +heap_freeze_tuple(HeapTuple tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff) { @@ -7576,10 +7782,10 @@ MultiXactIdGetUpdateXid(TransactionId xmax, uint16 t_infomask) * checking the hint bits. */ TransactionId -HeapTupleGetUpdateXid(const HeapTupleHeaderData *tup) +HeapTupleGetUpdateXid(const HeapTupleData *tuple) { - return MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(tup), - tup->t_infomask); + return MultiXactIdGetUpdateXid(HeapTupleGetRawXmax(tuple), + tuple->t_data->t_infomask); } /* @@ -7807,15 +8013,18 @@ ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status, * will eventually require freezing (if tuple isn't removed by pruning first). */ bool -heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) +heap_tuple_needs_eventual_freeze(HeapTuple htup) { TransactionId xid; + HeapTupleHeader tuple; + + tuple = htup->t_data; /* * If xmin is a normal transaction ID, this tuple is definitely not * frozen. */ - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(htup); if (TransactionIdIsNormal(xid)) return true; @@ -7826,13 +8035,13 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) { MultiXactId multi; - multi = HeapTupleHeaderGetRawXmax(tuple); + multi = HeapTupleGetRawXmax(htup); if (MultiXactIdIsValid(multi)) return true; } else { - xid = HeapTupleHeaderGetRawXmax(tuple); + xid = HeapTupleGetRawXmax(htup); if (TransactionIdIsNormal(xid)) return true; } @@ -7862,17 +8071,18 @@ heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple) * point that it fully commits to not freezing the tuple/page in question. */ bool -heap_tuple_should_freeze(HeapTupleHeader tuple, +heap_tuple_should_freeze(HeapTuple htup, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid) { TransactionId xid; MultiXactId multi; + HeapTupleHeader tuple = htup->t_data; bool freeze = false; /* First deal with xmin */ - xid = HeapTupleHeaderGetXmin(tuple); + xid = HeapTupleGetXmin(htup); if (TransactionIdIsNormal(xid)) { Assert(TransactionIdPrecedesOrEquals(cutoffs->relfrozenxid, xid)); @@ -7886,9 +8096,9 @@ heap_tuple_should_freeze(HeapTupleHeader tuple, xid = InvalidTransactionId; multi = InvalidMultiXactId; if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) - multi = HeapTupleHeaderGetRawXmax(tuple); + multi = HeapTupleGetRawXmax(htup); else - xid = HeapTupleHeaderGetRawXmax(tuple); + xid = HeapTupleGetRawXmax(htup); if (TransactionIdIsNormal(xid)) { @@ -7899,6 +8109,14 @@ heap_tuple_should_freeze(HeapTupleHeader tuple, if (TransactionIdPrecedes(xid, cutoffs->FreezeLimit)) freeze = true; } + else if ((tuple->t_infomask & HEAP_XMAX_INVALID) && + TransactionIdIsNormal(xid)) + { + /* + * To reset xmax without reading clog. + */ + freeze = true; + } else if (!MultiXactIdIsValid(multi)) { /* xmax is a permanent XID or invalid MultiXactId/XID */ @@ -7970,14 +8188,14 @@ heap_tuple_should_freeze(HeapTupleHeader tuple, * caller's WAL record) by REDO routine when it replays caller's operation. */ void -HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, +HeapTupleHeaderAdvanceConflictHorizon(HeapTuple tuple, TransactionId *snapshotConflictHorizon) { - TransactionId xmin = HeapTupleHeaderGetXmin(tuple); - TransactionId xmax = HeapTupleHeaderGetUpdateXid(tuple); - TransactionId xvac = HeapTupleHeaderGetXvac(tuple); + TransactionId xmin = HeapTupleGetXmin(tuple); + TransactionId xmax = HeapTupleGetUpdateXidAny(tuple); + TransactionId xvac = HeapTupleHeaderGetXvac(tuple->t_data); - if (tuple->t_infomask & HEAP_MOVED) + if (tuple->t_data->t_infomask & HEAP_MOVED) { if (TransactionIdPrecedes(*snapshotConflictHorizon, xvac)) *snapshotConflictHorizon = xvac; @@ -7989,8 +8207,8 @@ HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, * * Look for a committed hint bit, or if no xmin bit is set, check clog. */ - if (HeapTupleHeaderXminCommitted(tuple) || - (!HeapTupleHeaderXminInvalid(tuple) && TransactionIdDidCommit(xmin))) + if (HeapTupleHeaderXminCommitted(tuple->t_data) || + (!HeapTupleHeaderXminInvalid(tuple->t_data) && TransactionIdDidCommit(xmin))) { if (xmax != xmin && TransactionIdFollows(xmax, *snapshotConflictHorizon)) @@ -8338,7 +8556,7 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) for (;;) { ItemId lp; - HeapTupleHeader htup; + HeapTupleData htup; /* Sanity check (pure paranoia) */ if (offnum < FirstOffsetNumber) @@ -8375,16 +8593,18 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) if (!ItemIdIsNormal(lp)) break; - htup = (HeapTupleHeader) PageGetItem(page, lp); + htup.t_data = (HeapTupleHeader) PageGetItem(page, lp); + htup.t_len = ItemIdGetLength(lp); + HeapTupleCopyXidsFromPage(buf, &htup, page, IsToastRelation(rel)); /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) + !TransactionIdEquals(HeapTupleGetXmin(&htup), priorXmax)) break; - HeapTupleHeaderAdvanceConflictHorizon(htup, + HeapTupleHeaderAdvanceConflictHorizon(&htup, &snapshotConflictHorizon); /* @@ -8393,13 +8613,13 @@ heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate) * chain (they get their own index entries) -- just move on to * next htid from index AM caller. */ - if (!HeapTupleHeaderIsHotUpdated(htup)) + if (!HeapTupleHeaderIsHotUpdated(htup.t_data)) break; /* Advance to next HOT chain member */ - Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blkno); - offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + Assert(ItemPointerGetBlockNumber(&htup.t_data->t_ctid) == blkno); + offnum = ItemPointerGetOffsetNumber(&htup.t_data->t_ctid); + priorXmax = HeapTupleGetUpdateXidAny(&htup); } /* Enable further/final shrinking of deltids for caller */ @@ -8841,6 +9061,8 @@ log_heap_update(Relation reln, Buffer oldbuf, bool all_visible_cleared, bool new_all_visible_cleared) { xl_heap_update xlrec; + TransactionId xid_base, + multi_base; xl_heap_header xlhdr; xl_heap_header xlhdr_idx; uint8 info; @@ -8949,13 +9171,13 @@ log_heap_update(Relation reln, Buffer oldbuf, /* Prepare WAL data for the old page */ xlrec.old_offnum = ItemPointerGetOffsetNumber(&oldtup->t_self); - xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data); + xlrec.old_xmax = HeapTupleGetRawXmax(oldtup); xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask, oldtup->t_data->t_infomask2); /* Prepare WAL data for the new page */ xlrec.new_offnum = ItemPointerGetOffsetNumber(&newtup->t_self); - xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data); + xlrec.new_xmax = HeapTupleGetRawXmax(newtup); bufflags = REGBUF_STANDARD; if (init) @@ -8967,6 +9189,17 @@ log_heap_update(Relation reln, Buffer oldbuf, if (oldbuf != newbuf) XLogRegisterBuffer(1, oldbuf, REGBUF_STANDARD); + if (info & XLOG_HEAP_INIT_PAGE) + { + HeapPageSpecial special = HeapPageGetSpecial(page); + + Assert(!IsToastRelation(reln)); + xid_base = special->pd_xid_base; + multi_base = special->pd_multi_base; + XLogRegisterData((char *) &xid_base, sizeof(xid_base)); + XLogRegisterData((char *) &multi_base, sizeof(multi_base)); + } + XLogRegisterData(&xlrec, SizeOfHeapUpdate); /* @@ -9079,8 +9312,8 @@ log_heap_new_cid(Relation relation, HeapTuple tup) { Assert(!(hdr->t_infomask & HEAP_XMAX_INVALID)); Assert(!HeapTupleHeaderXminInvalid(hdr)); - xlrec.cmin = HeapTupleHeaderGetCmin(hdr); - xlrec.cmax = HeapTupleHeaderGetCmax(hdr); + xlrec.cmin = HeapTupleGetCmin(tup); + xlrec.cmax = HeapTupleGetCmax(tup); xlrec.combocid = HeapTupleHeaderGetRawCommandId(hdr); } /* No combo CID, so only cmin or cmax can be set by this TX */ @@ -9269,14 +9502,14 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation, case HEAPTUPLE_LIVE: if (visible) return; - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); break; case HEAPTUPLE_RECENTLY_DEAD: case HEAPTUPLE_DELETE_IN_PROGRESS: if (visible) - xid = HeapTupleHeaderGetUpdateXid(tuple->t_data); + xid = HeapTupleGetUpdateXidAny(tuple); else - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); if (TransactionIdPrecedes(xid, TransactionXmin)) { @@ -9286,7 +9519,7 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation, } break; case HEAPTUPLE_INSERT_IN_PROGRESS: - xid = HeapTupleHeaderGetXmin(tuple->t_data); + xid = HeapTupleGetXmin(tuple); break; case HEAPTUPLE_DEAD: Assert(!visible); @@ -9324,3 +9557,511 @@ HeapCheckForSerializableConflictOut(bool visible, Relation relation, CheckForSerializableConflictOut(relation, xid, snapshot); } + +static void +xid_min_max(ShortTransactionId *min, ShortTransactionId *max, + ShortTransactionId xid, + bool *found) +{ + Assert(TransactionIdIsNormal(xid)); + Assert(xid <= MaxShortTransactionId); + + if (!*found) + { + *min = *max = xid; + *found = true; + } + else + { + *min = Min(*min, xid); + *max = Max(*max, xid); + } +} + +/* + * Find minimum and maximum short transaction ids which occurs in the page. + * + * Works for multi and non multi transaction. Which is defined by "multi" + * argument. + */ +static bool +heap_page_xid_min_max(Page page, bool multi, + ShortTransactionId *min, ShortTransactionId *max, + bool is_toast) +{ + bool found; + OffsetNumber offnum, + maxoff; + ItemId itemid; + HeapTupleHeader htup; + + maxoff = PageGetMaxOffsetNumber(page); + found = false; + + Assert(!multi || !is_toast); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + if (!multi) + { + /* + * For non multi transactions we should see inside the tuple for + * update transaction. + */ + Assert(!is_toast || !(htup->t_infomask & HEAP_XMAX_IS_MULTI)); + + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin) && + !HeapTupleHeaderXminFrozen(htup)) + { + xid_min_max(min, max, htup->t_choice.t_heap.t_xmin, &found); + } + + if ((htup->t_infomask & HEAP_XMAX_IS_MULTI) && + (!(htup->t_infomask & HEAP_XMAX_LOCK_ONLY))) + { + TransactionId update_xid; + ShortTransactionId xid; + + Assert(!is_toast); + update_xid = MultiXactIdGetUpdateXid(HeapTupleHeaderGetRawXmax(page, htup), + htup->t_infomask); + xid = NormalTransactionIdToShort(HeapPageGetSpecial(page)->pd_xid_base, + update_xid); + + xid_min_max(min, max, xid, &found); + } + } + + if (!TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax)) + continue; + + if (multi != ((htup->t_infomask & HEAP_XMAX_IS_MULTI) != 0)) + continue; + + xid_min_max(min, max, htup->t_choice.t_heap.t_xmax, &found); + } + + Assert(!found || (*min > InvalidTransactionId && *max <= MaxShortTransactionId)); + + return found; +} + +/* + * Shift xid base in the page. WAL-logged if buffer is specified. + */ +static void +heap_page_shift_base(Relation relation, Buffer buffer, Page page, + bool multi, int64 delta, bool is_toast) +{ + TransactionId *xid_base, + *multi_base; + OffsetNumber offnum, + maxoff; + ItemId itemid; + HeapTupleHeader htup; + + Assert(IsBufferLockedExclusive(buffer)); + + START_CRIT_SECTION(); + + if (is_toast) + { + Assert(!multi); + xid_base = &ToastPageGetSpecial(page)->pd_xid_base; + multi_base = NULL; + } + else + { + HeapPageSpecial special = HeapPageGetSpecial(page); + + xid_base = &special->pd_xid_base; + multi_base = &special->pd_multi_base; + } + + /* Iterate over page items */ + maxoff = PageGetMaxOffsetNumber(page); + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + /* Apply xid shift to heap tuple */ + if (!multi) + { + /* shift xmin */ + if (TransactionIdIsNormal(htup->t_choice.t_heap.t_xmin) && + !HeapTupleHeaderXminFrozen(htup)) + { + Assert(htup->t_choice.t_heap.t_xmin - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmin - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmin -= delta; + } + } + + /* shift xmax */ + if (!TransactionIdIsNormal(htup->t_choice.t_heap.t_xmax)) + continue; + + if (multi != (bool) (htup->t_infomask & HEAP_XMAX_IS_MULTI)) + continue; + + Assert(htup->t_choice.t_heap.t_xmax - delta >= FirstNormalTransactionId); + Assert(htup->t_choice.t_heap.t_xmax - delta <= MaxShortTransactionId); + htup->t_choice.t_heap.t_xmax -= delta; + } + + /* Apply xid shift to base as well */ + if (!multi) + *xid_base += delta; + else + *multi_base += delta; + + if (BufferIsValid(buffer)) + MarkBufferDirty(buffer); + + /* Write WAL record if needed */ + if (relation && RelationNeedsWAL(relation) && maxoff != 0) + { + XLogRecPtr recptr; + xl_heap_base_shift xlrec; + + xlrec.delta = delta; + xlrec.multi = multi; + xlrec.flags = 0; + if (IsToastRelation(relation)) + xlrec.flags |= XLH_BASE_SHIFT_ON_TOAST_RELATION; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, SizeOfHeapBaseShift); + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + + recptr = XLogInsert(RM_HEAP3_ID, XLOG_HEAP3_BASE_SHIFT); + + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); +} + +/* + * Freeze xids in the single heap page. Useful when we can't fit new xid even + * with base shift. + */ +static void +freeze_single_heap_page(Relation relation, Buffer buffer) +{ + OffsetNumber offnum; + GlobalVisState *vistest; + VacuumParams params = {0}; + struct VacuumCutoffs cutoffs = {0}; + TransactionId new_relfrozen_xid; + MultiXactId new_relmin_mxid; + PruneFreezeResult presult; + + vacuum_get_cutoffs(relation, ¶ms, &cutoffs); + new_relfrozen_xid = cutoffs.FreezeLimit; + new_relmin_mxid = cutoffs.MultiXactCutoff; + + vistest = GlobalVisTestFor(relation); + heap_page_prune_and_freeze(relation, buffer, vistest, HEAP_PAGE_PRUNE_FREEZE, + NULL, &presult, PRUNE_ON_ACCESS, &offnum, &new_relfrozen_xid, &new_relmin_mxid, false); + + if (presult.ndeleted > presult.nnewlpdead) + pgstat_update_heap_dead_tuples(relation, + presult.ndeleted - presult.nnewlpdead); +} + +/* + * Check if xid still fits on a page with given base and delta. + */ +static inline bool +is_delta_fits_heap_page(TransactionId xid, TransactionId base, int64 delta) +{ + return xid >= base + delta + FirstNormalTransactionId && + xid <= base + delta + MaxShortTransactionId; +} + +/* + * Check if xid fits on a page with given base. + */ +static inline bool +is_xid_fits_heap_page(TransactionId xid, TransactionId base) +{ + return xid >= base + FirstNormalTransactionId && + xid <= base + MaxShortTransactionId; +} + +/* + * Check if delta fits on a page. + * + * If delta does not fits, never return. + */ +static void +heap_page_check_delta(Buffer buffer, + TransactionId xid, TransactionId base, + ShortTransactionId min, ShortTransactionId max, + int64 delta, int64 *freeDelta, int64 *requiredDelta) +{ + BufferDesc *buf; + RelPathStr path; + ProcNumber backend; + + Assert((freeDelta == NULL) == (requiredDelta == NULL)); + + /* + * If delta fits the page, we good to go ... + */ + if (is_delta_fits_heap_page(xid, base, delta)) + return; + + /* + * ... otherwise handle the error. + */ + if (buffer == InvalidBuffer) + return; + + if (BufferIsLocal(buffer)) + { + buf = GetLocalBufferDescriptor(-buffer - 1); + backend = MyProcNumber; + } + else + { + buf = GetBufferDescriptor(buffer - 1); + backend = INVALID_PROC_NUMBER; + } + + path = relpathbackend(BufTagGetRelFileLocator(&buf->tag), backend, + buf->tag.forkNum); + + if (freeDelta == NULL) + elog(FATAL, "Fatal xid base calculation error: xid = %llu, base = %llu, min = %u, max = %u, delta = %lld (rel=%s, blockNum=%u)", + (unsigned long long) xid, (unsigned long long) base, + min, max, + (long long) delta, + path.str, buf->tag.blockNum); + + elog(FATAL, "Fatal xid base calculation error: xid = %llu, base = %llu, min = %u, max = %u, freeDelta = %lld, requiredDelta = %lld, delta = %lld (rel=%s, blockNum=%u)", + (unsigned long long) xid, (unsigned long long) base, + min, max, + (long long) *freeDelta, (long long) *requiredDelta, + (long long) delta, + path.str, buf->tag.blockNum); +} + +/* + * Shift page base. + */ +static void +heap_page_apply_delta(Relation relation, Buffer buffer, Page page, + TransactionId xid, bool multi, + TransactionId base, int64 delta, bool is_toast) +{ + Assert(is_delta_fits_heap_page(xid, base, delta)); + + heap_page_shift_base(relation, buffer, page, multi, delta, is_toast); + +#ifdef USE_ASSERT_CHECKING + if (is_toast) + { + Assert(!multi); + base = ToastPageGetSpecial(page)->pd_xid_base; + } + else + base = multi ? HeapPageGetSpecial(page)->pd_multi_base : + HeapPageGetSpecial(page)->pd_xid_base; + + Assert(is_xid_fits_heap_page(xid, base)); +#endif /* USE_ASSERT_CHECKING */ +} + +/* + * Try to fit xid on a page. + */ +static int +heap_page_try_prepare_for_xid(Relation relation, Buffer buffer, Page page, + TransactionId xid, bool multi, bool is_toast) +{ + TransactionId base; + ShortTransactionId min = InvalidTransactionId, + max = InvalidTransactionId; + int64 delta, + freeDelta, + requiredDelta; + + if (is_toast) + { + Assert(!multi); + base = ToastPageGetSpecial(page)->pd_xid_base; + } + else + base = multi ? HeapPageGetSpecial(page)->pd_multi_base : + HeapPageGetSpecial(page)->pd_xid_base; + + /* If xid fits the page no action needed. */ + if (is_xid_fits_heap_page(xid, base)) + return 0; + + /* No items on the page? */ + if (!heap_page_xid_min_max(page, multi, &min, &max, is_toast)) + { + delta = (int64) (xid - FirstNormalTransactionId) - (int64) base; + heap_page_check_delta(buffer, xid, base, min, max, delta, NULL, NULL); + heap_page_apply_delta(relation, buffer, page, xid, multi, base, delta, + is_toast); + return 0; + } + + /* Can we just shift base on the page? */ + if (xid < base + FirstNormalTransactionId) + { + freeDelta = MaxShortTransactionId - max; + requiredDelta = (base + FirstNormalTransactionId) - xid; + /* Shouldn't consider setting base less than 0 */ + freeDelta = Min(freeDelta, base); + + if (requiredDelta > freeDelta) + return -1; + + delta = -(freeDelta + requiredDelta) / 2; + } + else + { + freeDelta = min - FirstNormalTransactionId; + requiredDelta = xid - (base + MaxShortTransactionId); + + if (requiredDelta > freeDelta) + return -1; + + delta = (freeDelta + requiredDelta) / 2; + } + + heap_page_check_delta(buffer, xid, base, min, max, + delta, &freeDelta, &requiredDelta); + heap_page_apply_delta(relation, buffer, page, xid, multi, base, + delta, is_toast); + + return 0; +} + +static void +heap_xlog_base_shift(XLogReaderState *record) +{ + XLogRecPtr lsn = record->EndRecPtr; + xl_heap_base_shift *xlrec = (xl_heap_base_shift *) XLogRecGetData(record); + Buffer buffer; + Page page; + BlockNumber blkno; + RelFileLocator target_node; + + XLogRecGetBlockTag(record, 0, &target_node, NULL, &blkno); + + if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) + { + page = BufferGetPage(buffer); + heap_page_shift_base(NULL, InvalidBuffer, page, xlrec->multi, + xlrec->delta, + xlrec->flags & XLH_BASE_SHIFT_ON_TOAST_RELATION); + PageSetLSN(page, lsn); + MarkBufferDirty(buffer); + } + + if (BufferIsValid(buffer)) + UnlockReleaseBuffer(buffer); +} + +/* + * Ensure that given xid fits base of given page. + */ +static bool +heap_page_prepare_for_xid(Relation relation, Buffer buffer, + TransactionId xid, bool multi) +{ + Page page = BufferGetPage(buffer); + int res; + + /* "Double xmax" page format doesn't require any preparation */ + if (HeapPageIsDoubleXmax(page)) + return false; + + if (!TransactionIdIsNormal(xid)) + return false; + + res = heap_page_try_prepare_for_xid(relation, buffer, page, xid, multi, + IsToastRelation(relation)); + if (res != -1) + return res == 1; + + /* Have to try freeing the page... */ + freeze_single_heap_page(relation, buffer); + + res = heap_page_try_prepare_for_xid(relation, buffer, page, xid, multi, + IsToastRelation(relation)); + if (res != -1) + return res == 1; + + elog(ERROR, "could not fit xid into page"); + + return false; +} + +/* + * Ensure that given xid fits base of given page. + */ +void +rewrite_page_prepare_for_xid(Page page, HeapTuple tup, bool is_toast) +{ + TransactionId xid; + int res; + + /* xmin */ + xid = HeapTupleGetXmin(tup); + if (TransactionIdIsNormal(xid)) + { + res = heap_page_try_prepare_for_xid(NULL, InvalidBuffer, page, xid, + false, is_toast); + if (res == -1) + elog(ERROR, "could not fit xid into page"); + } + + /* xmax */ + xid = HeapTupleGetRawXmax(tup); + if (TransactionIdIsNormal(xid)) + { + res = heap_page_try_prepare_for_xid(NULL, InvalidBuffer, page, xid, + tup->t_data->t_infomask & HEAP_XMAX_IS_MULTI, + is_toast); + if (res == -1) + elog(ERROR, "could not fit xid into page"); + } +} + +void +heap3_redo(XLogReaderState *record) +{ + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + switch (info & XLOG_HEAP_OPMASK) + { + case XLOG_HEAP3_BASE_SHIFT: + heap_xlog_base_shift(record); + break; + default: + elog(PANIC, "heap3_redo: unknown op code %u", info); + } +} diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 4aaeda849e4..48684b5f143 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -427,7 +427,7 @@ tuple_lock_retry: * changes in an existing tuple, except to invalid or * frozen, and neither of those can match priorXmax.) */ - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data), + if (!TransactionIdEquals(HeapTupleGetXmin(tuple), priorXmax)) { ReleaseBuffer(buffer); @@ -487,7 +487,7 @@ tuple_lock_retry: * variable instead of doing HeapTupleHeaderGetXmin again. */ if (TransactionIdIsCurrentTransactionId(priorXmax) && - HeapTupleHeaderGetCmin(tuple->t_data) >= cid) + HeapTupleGetCmin(tuple) >= cid) { tmfd->xmax = priorXmax; @@ -495,7 +495,7 @@ tuple_lock_retry: * Cmin is the problematic value, so store that. See * above. */ - tmfd->cmax = HeapTupleHeaderGetCmin(tuple->t_data); + tmfd->cmax = HeapTupleGetCmin(tuple); ReleaseBuffer(buffer); return TM_SelfModified; } @@ -521,7 +521,7 @@ tuple_lock_retry: /* * As above, if xmin isn't what we're expecting, do nothing. */ - if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple->t_data), + if (!TransactionIdEquals(HeapTupleGetXmin(tuple), priorXmax)) { ReleaseBuffer(buffer); @@ -552,7 +552,7 @@ tuple_lock_retry: /* updated, so look at the updated row */ *tid = tuple->t_data->t_ctid; /* updated row should have xmin matching this xmax */ - priorXmax = HeapTupleHeaderGetUpdateXid(tuple->t_data); + priorXmax = HeapTupleGetUpdateXidAny(tuple); ReleaseBuffer(buffer); /* loop back to fetch next in chain */ } @@ -863,7 +863,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, * case we had better copy it. */ if (!is_system_catalog && - !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) + !TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple))) elog(WARNING, "concurrent insert in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as live */ @@ -875,7 +875,7 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, * Similar situation to INSERT_IN_PROGRESS case. */ if (!is_system_catalog && - !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple->t_data))) + !TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple))) elog(WARNING, "concurrent delete in progress within table \"%s\"", RelationGetRelationName(OldHeap)); /* treat as recently dead */ @@ -1068,6 +1068,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, targtuple->t_tableOid = RelationGetRelid(scan->rs_rd); targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid); targtuple->t_len = ItemIdGetLength(itemid); + HeapTupleCopyXidsFromPage(hscan->rs_cbuf, targtuple, targpage, + IsToastRelation(scan->rs_rd)); switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin, hscan->rs_cbuf)) @@ -1103,7 +1105,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, * numbers we report to the cumulative stats system to make * this come out right.) */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(targtuple->t_data))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(targtuple))) { sample_it = true; *liverows += 1; @@ -1134,7 +1136,7 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, * but not the post-image. We also get sane results if the * concurrent transaction never commits. */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(targtuple->t_data))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(targtuple))) *deadrows += 1; else { @@ -1383,7 +1385,8 @@ heapam_index_build_range_scan(Relation heapRelation, Page page = BufferGetPage(hscan->rs_cbuf); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - heap_get_root_tuples(page, root_offsets); + heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page, + root_offsets); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); root_blkno = hscan->rs_cblock; @@ -1476,7 +1479,7 @@ heapam_index_build_range_scan(Relation heapRelation, * before commit there. Give a warning if neither case * applies. */ - xwait = HeapTupleHeaderGetXmin(heapTuple->t_data); + xwait = HeapTupleGetXmin(heapTuple); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) @@ -1535,7 +1538,7 @@ heapam_index_build_range_scan(Relation heapRelation, break; } - xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data); + xwait = HeapTupleGetUpdateXidAny(heapTuple); if (!TransactionIdIsCurrentTransactionId(xwait)) { if (!is_system_catalog) @@ -1680,7 +1683,8 @@ heapam_index_build_range_scan(Relation heapRelation, Page page = BufferGetPage(hscan->rs_cbuf); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - heap_get_root_tuples(page, root_offsets); + heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page, + root_offsets); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); } @@ -1846,7 +1850,8 @@ heapam_index_validate_scan(Relation heapRelation, Page page = BufferGetPage(hscan->rs_cbuf); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - heap_get_root_tuples(page, root_offsets); + heap_get_root_tuples(heapRelation, hscan->rs_cbuf, page, + root_offsets); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); memset(in_index, 0, sizeof(in_index)); @@ -2179,6 +2184,8 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan, hscan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem(page, lp); hscan->rs_ctup.t_len = ItemIdGetLength(lp); hscan->rs_ctup.t_tableOid = scan->rs_rd->rd_id; + hscan->rs_ctup.t_xmin = hscan->rs_xmin[hscan->rs_cindex]; + hscan->rs_ctup.t_xmax = hscan->rs_xmax[hscan->rs_cindex]; ItemPointerSet(&hscan->rs_ctup.t_self, hscan->rs_cblock, targoffset); pgstat_count_heap_fetch(scan->rs_rd); @@ -2335,8 +2342,17 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple->t_len = ItemIdGetLength(itemid); - ItemPointerSet(&(tuple->t_self), blockno, tupoffset); + if (pagemode) + { + tuple->t_xmin = InvalidTransactionId; + tuple->t_xmax = InvalidTransactionId; + } + else + HeapTupleCopyXidsFromPage(hscan->rs_cbuf, tuple, page, + IsToastRelation(scan->rs_rd)); + + ItemPointerSet(&(tuple->t_self), blockno, tupoffset); if (all_visible) visible = true; @@ -2592,7 +2608,12 @@ BitmapHeapScanNextBlock(TableScanDesc scan, ItemPointerSet(&tid, block, offnum); if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot, &heapTuple, NULL, true)) - hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid); + { + hscan->rs_vistuples[ntup] = ItemPointerGetOffsetNumber(&tid); + hscan->rs_xmin[ntup] = heapTuple.t_xmin; + hscan->rs_xmax[ntup] = heapTuple.t_xmax; + ++ntup; + } } } else @@ -2617,13 +2638,18 @@ BitmapHeapScanNextBlock(TableScanDesc scan, loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp); loctup.t_len = ItemIdGetLength(lp); loctup.t_tableOid = scan->rs_rd->rd_id; + HeapTupleCopyXidsFromPage(hscan->rs_cbuf, &loctup, page, + IsToastRelation(scan->rs_rd)); ItemPointerSet(&loctup.t_self, block, offnum); valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer); if (valid) { - hscan->rs_vistuples[ntup++] = offnum; + hscan->rs_vistuples[ntup] = offnum; + hscan->rs_xmin[ntup] = loctup.t_xmin; + hscan->rs_xmax[ntup] = loctup.t_xmax; + ++ntup; PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot, - HeapTupleHeaderGetXmin(loctup.t_data)); + HeapTupleGetXmin(&loctup)); } HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup, buffer, snapshot); diff --git a/src/backend/access/heap/heapam_visibility.c b/src/backend/access/heap/heapam_visibility.c index 05f6946fe60..ecb16f954ec 100644 --- a/src/backend/access/heap/heapam_visibility.c +++ b/src/backend/access/heap/heapam_visibility.c @@ -218,7 +218,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; @@ -230,7 +230,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -242,7 +242,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return false; } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -252,11 +252,11 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return false; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) return false; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -285,7 +285,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -300,17 +300,17 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) return true; } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return true; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -328,7 +328,7 @@ HeapTupleSatisfiesSelf(HeapTuple htup, Snapshot snapshot, Buffer buffer) } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); return false; } @@ -417,7 +417,7 @@ HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, * is canceled by super-deleting the tuple. This also applies to * TOAST tuples created during speculative insertion. */ - else if (!TransactionIdIsValid(HeapTupleHeaderGetXmin(tuple))) + else if (!TransactionIdIsValid(HeapTupleGetXmin(htup))) return false; } @@ -507,9 +507,9 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { - if (HeapTupleHeaderGetCmin(tuple) >= curcid) + if (HeapTupleGetCmin(htup) >= curcid) return TM_Invisible; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ @@ -519,7 +519,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, { TransactionId xmax; - xmax = HeapTupleHeaderGetRawXmax(tuple); + xmax = HeapTupleGetRawXmax(htup); /* * Careful here: even though this tuple was created by our own @@ -550,7 +550,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -558,21 +558,21 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, /* deleting subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; return TM_Ok; } else { - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -580,16 +580,16 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, return TM_Ok; } - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) return TM_Invisible; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -623,17 +623,17 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), true)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true)) return TM_BeingModified; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return TM_Ok; } - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); if (!TransactionIdIsValid(xmax)) { - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; } @@ -642,13 +642,13 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, if (TransactionIdIsCurrentTransactionId(xmax)) { - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - if (MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) return TM_BeingModified; if (TransactionIdDidCommit(xmax)) @@ -664,7 +664,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, * what about the other members? */ - if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) { /* * There's no member, even just a locker, alive anymore, so we can @@ -681,20 +681,20 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return TM_BeingModified; - if (HeapTupleHeaderGetCmax(tuple) >= curcid) + if (HeapTupleGetCmax(htup) >= curcid) return TM_SelfModified; /* updated after scan started */ else return TM_Invisible; /* updated before scan started */ } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return TM_BeingModified; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -712,7 +712,7 @@ HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); if (!ItemPointerEquals(&htup->t_self, &tuple->t_ctid)) return TM_Updated; /* updated by other */ else @@ -795,7 +795,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; @@ -807,7 +807,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -819,7 +819,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return false; } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -829,7 +829,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return false; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) { /* * Return the speculative token to caller. Caller can worry about @@ -845,13 +845,13 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Assert(snapshot->speculativeToken != 0); } - snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple); + snapshot->xmin = HeapTupleGetRawXmin(htup); /* XXX shouldn't we fall through to look at xmax? */ return true; /* in insertion by other */ } - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -880,7 +880,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -898,21 +898,21 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, return true; } - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) { if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) - snapshot->xmax = HeapTupleHeaderGetRawXmax(tuple); + snapshot->xmax = HeapTupleGetRawXmax(htup); return true; } - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -930,7 +930,7 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); return false; /* updated by other */ } @@ -1018,9 +1018,9 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, } } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { - if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) + if (HeapTupleGetCmin(htup) >= snapshot->curcid) return false; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ @@ -1033,7 +1033,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, { TransactionId xmax; - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -1041,13 +1041,13 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* updating subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) return true; - else if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + else if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* updated after scan started */ else return false; /* updated before scan started */ } - if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -1055,16 +1055,16 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, return true; } - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } - else if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) + else if (XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot)) return false; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* it must have aborted or crashed */ @@ -1077,7 +1077,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, { /* xmin is committed, but maybe not according to our snapshot */ if (!HeapTupleHeaderXminFrozen(tuple) && - XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) + XidInMVCCSnapshot(HeapTupleGetRawXmin(htup), snapshot)) return false; /* treat as still in progress */ } @@ -1096,14 +1096,14 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); if (TransactionIdIsCurrentTransactionId(xmax)) { - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ @@ -1118,18 +1118,18 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmax(htup))) { - if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) + if (HeapTupleGetCmax(htup) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } - if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) + if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot)) return true; - if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, @@ -1139,12 +1139,12 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, /* xmax transaction committed */ SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); } else { /* xmax is committed, but maybe not according to our snapshot */ - if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) + if (XidInMVCCSnapshot(HeapTupleGetRawXmax(htup), snapshot)) return true; /* treat as still in progress */ } @@ -1259,21 +1259,21 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de return HEAPTUPLE_DEAD; } } - else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(htup))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HEAPTUPLE_INSERT_IN_PROGRESS; /* only locked? run infomask-only check first, for performance */ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) || - HeapTupleHeaderIsOnlyLocked(tuple)) + HeapTupleIsOnlyLocked(htup)) return HEAPTUPLE_INSERT_IN_PROGRESS; /* inserted and then deleted by same xact */ - if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tuple))) + if (TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(htup))) return HEAPTUPLE_DELETE_IN_PROGRESS; /* deleting subtransaction must have aborted */ return HEAPTUPLE_INSERT_IN_PROGRESS; } - else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdIsInProgress(HeapTupleGetRawXmin(htup))) { /* * It'd be possible to discern between INSERT/DELETE in progress @@ -1285,9 +1285,9 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de */ return HEAPTUPLE_INSERT_IN_PROGRESS; } - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmin(htup))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, - HeapTupleHeaderGetRawXmin(tuple)); + HeapTupleGetRawXmin(htup)); else { /* @@ -1329,14 +1329,14 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de * possibly be running; otherwise have to check. */ if (!HEAP_LOCKED_UPGRADED(tuple->t_infomask) && - MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), + MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), true)) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); } else { - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return HEAPTUPLE_LIVE; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); @@ -1354,7 +1354,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { - TransactionId xmax = HeapTupleGetUpdateXid(tuple); + TransactionId xmax = HeapTupleGetUpdateXid(htup); /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); @@ -1377,7 +1377,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de *dead_after = xmax; return HEAPTUPLE_RECENTLY_DEAD; } - else if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple), false)) + else if (!MultiXactIdIsRunning(HeapTupleGetRawXmax(htup), false)) { /* * Not in Progress, Not Committed, so either Aborted or crashed. @@ -1391,11 +1391,11 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { - if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) + if (TransactionIdIsInProgress(HeapTupleGetRawXmax(htup))) return HEAPTUPLE_DELETE_IN_PROGRESS; - else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) + else if (TransactionIdDidCommit(HeapTupleGetRawXmax(htup))) SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); else { /* @@ -1417,7 +1417,7 @@ HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer, TransactionId *de * Deleter committed, allow caller to check if it was recent enough that * some open transactions could still see the tuple. */ - *dead_after = HeapTupleHeaderGetRawXmax(tuple); + *dead_after = HeapTupleGetRawXmax(htup); return HEAPTUPLE_RECENTLY_DEAD; } @@ -1513,7 +1513,7 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest) /* Deleter committed, so tuple is dead if the XID is old enough. */ return GlobalVisTestIsRemovableXid(vistest, - HeapTupleHeaderGetRawXmax(tuple)); + HeapTupleGetRawXmax(htup)); } /* @@ -1526,8 +1526,9 @@ HeapTupleIsSurelyDead(HeapTuple htup, GlobalVisState *vistest) * at the top of this file. */ bool -HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) +HeapTupleIsOnlyLocked(HeapTuple htup) { + HeapTupleHeader tuple = htup->t_data; TransactionId xmax; /* if there's no valid Xmax, then there's obviously no update either */ @@ -1538,7 +1539,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) return true; /* invalid xmax means no update */ - if (!TransactionIdIsValid(HeapTupleHeaderGetRawXmax(tuple))) + if (!TransactionIdIsValid(HeapTupleGetRawXmax(htup))) return true; /* @@ -1549,7 +1550,7 @@ HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple) return false; /* ... but if it's a multi, then perhaps the updating Xid aborted. */ - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); @@ -1597,8 +1598,8 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer) { HeapTupleHeader tuple = htup->t_data; - TransactionId xmin = HeapTupleHeaderGetXmin(tuple); - TransactionId xmax = HeapTupleHeaderGetRawXmax(tuple); + TransactionId xmin = HeapTupleGetXmin(htup); + TransactionId xmax = HeapTupleGetRawXmax(htup); Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); @@ -1698,7 +1699,7 @@ HeapTupleSatisfiesHistoricMVCC(HeapTuple htup, Snapshot snapshot, */ else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { - xmax = HeapTupleGetUpdateXid(tuple); + xmax = HeapTupleGetUpdateXid(htup); } /* check if it's one of our txids, toplevel is also in there */ diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index 30f4c2d3c67..768bd6ad53d 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -106,7 +106,10 @@ heap_xlog_prune_freeze(XLogReaderState *record) (xlrec.flags & XLHP_CLEANUP_LOCK) == 0, redirected, nredirected, nowdead, ndead, - nowunused, nunused); + nowunused, nunused, + (xlrec.flags & XLHP_REPAIR_FRAGMENTATION) != 0, + (xlrec.flags & XLHP_ON_TOAST_RELATION) != 0); + /* Freeze tuples */ for (int p = 0; p < nplans; p++) @@ -127,11 +130,14 @@ heap_xlog_prune_freeze(XLogReaderState *record) { OffsetNumber offset = *(frz_offsets++); ItemId lp; - HeapTupleHeader tuple; + HeapTupleData tp; lp = PageGetItemId(page, offset); - tuple = (HeapTupleHeader) PageGetItem(page, lp); - heap_execute_freeze_tuple(tuple, &frz); + tp.t_data = (HeapTupleHeader) PageGetItem(page, lp); + tp.t_len = ItemIdGetLength(lp); + HeapTupleCopyXidsFromPage(buffer, &tp, page, + (xlrec.flags & XLHP_ON_TOAST_RELATION) != 0); + heap_execute_freeze_tuple(&tp, &frz); } } @@ -371,6 +377,8 @@ heap_xlog_delete(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = BufferGetPage(buffer); if (PageGetMaxOffsetNumber(page) >= xlrec->offnum) @@ -386,14 +394,19 @@ heap_xlog_delete(XLogReaderState *record) HeapTupleHeaderClearHotUpdated(htup); fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); + tuple.t_data = htup; if (!(xlrec->flags & XLH_DELETE_IS_SUPER)) - HeapTupleHeaderSetXmax(htup, xlrec->xmax); + HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->xmax, + (xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION) != 0); else - HeapTupleHeaderSetXmin(htup, InvalidTransactionId); + HeapTupleAndHeaderSetXmin(page, &tuple, InvalidTransactionId, + (xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION) != 0); + HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Mark the page as a candidate for pruning */ - PageSetPrunable(page, XLogRecGetXid(record)); + PageSetPrunable(page, XLogRecGetXid(record), + (xlrec->flags & XLH_DELETE_PAGE_ON_TOAST_RELATION) != 0); if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); @@ -417,7 +430,7 @@ static void heap_xlog_insert(XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; - xl_heap_insert *xlrec = (xl_heap_insert *) XLogRecGetData(record); + xl_heap_insert *xlrec; Buffer buffer; Page page; union @@ -433,6 +446,20 @@ heap_xlog_insert(XLogReaderState *record) BlockNumber blkno; ItemPointerData target_tid; XLogRedoAction action; + bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(record); + TransactionId xid_base = InvalidTransactionId; + TransactionId multi_base = InvalidTransactionId; + + if (isinit) + { + xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + multi_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + + xlrec = (xl_heap_insert *) rec_data; XLogRecGetBlockTag(record, 0, &target_locator, NULL, &blkno); ItemPointerSetBlockNumber(&target_tid, blkno); @@ -457,11 +484,28 @@ heap_xlog_insert(XLogReaderState *record) * If we inserted the first and only tuple on the page, re-initialize the * page from scratch. */ - if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) + if (isinit) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); - PageInit(page, BufferGetPageSize(buffer), 0); + + if (xlrec->flags & XLH_INSERT_ON_TOAST_RELATION) + { + PageInit(page, BufferGetPageSize(buffer), + sizeof(ToastPageSpecialData)); + ToastPageGetSpecial(page)->pd_xid_base = xid_base; + } + else + { + HeapPageSpecial special; + + PageInit(page, BufferGetPageSize(buffer), + sizeof(HeapPageSpecialData)); + special = HeapPageGetSpecial(page); + special->pd_xid_base = xid_base; + special->pd_multi_base = multi_base; + } + action = BLK_NEEDS_REDO; } else @@ -470,6 +514,7 @@ heap_xlog_insert(XLogReaderState *record) { Size datalen; char *data; + HeapTupleData tuple; page = BufferGetPage(buffer); @@ -493,7 +538,9 @@ heap_xlog_insert(XLogReaderState *record) htup->t_infomask2 = xlhdr.t_infomask2; htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + tuple.t_data = htup; + HeapTupleAndHeaderSetXmin(page, &tuple, XLogRecGetXid(record), + (xlrec->flags & XLH_INSERT_ON_TOAST_RELATION) != 0); HeapTupleHeaderSetCmin(htup, FirstCommandId); htup->t_ctid = target_tid; @@ -553,12 +600,22 @@ heap_xlog_multi_insert(XLogReaderState *record) int i; bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; XLogRedoAction action; + TransactionId xid_base = InvalidTransactionId, + multi_base = InvalidTransactionId; + Pointer rec_data = (Pointer) XLogRecGetData(record); /* * Insertion doesn't overwrite MVCC data, so no conflict processing is * required. */ - xlrec = (xl_heap_multi_insert *) XLogRecGetData(record); + if (isinit) + { + xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + multi_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + xlrec = (xl_heap_multi_insert *) rec_data; XLogRecGetBlockTag(record, 0, &rlocator, NULL, &blkno); @@ -585,7 +642,22 @@ heap_xlog_multi_insert(XLogReaderState *record) { buffer = XLogInitBufferForRedo(record, 0); page = BufferGetPage(buffer); - PageInit(page, BufferGetPageSize(buffer), 0); + + if ((xlrec->flags & XLH_INSERT_ON_TOAST_RELATION) != 0) + { + PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData)); + ToastPageGetSpecial(page)->pd_xid_base = xid_base; + } + else + { + HeapPageSpecial special; + + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + special = HeapPageGetSpecial(page); + special->pd_xid_base = xid_base; + special->pd_multi_base = multi_base; + } + action = BLK_NEEDS_REDO; } else @@ -606,6 +678,7 @@ heap_xlog_multi_insert(XLogReaderState *record) { OffsetNumber offnum; xl_multi_insert_tuple *xlhdr; + HeapTupleData tuple; /* * If we're reinitializing the page, the tuples are stored in @@ -636,7 +709,9 @@ heap_xlog_multi_insert(XLogReaderState *record) htup->t_infomask2 = xlhdr->t_infomask2; htup->t_infomask = xlhdr->t_infomask; htup->t_hoff = xlhdr->t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + tuple.t_data = htup; + HeapTupleAndHeaderSetXmin(page, &tuple, XLogRecGetXid(record), + false); HeapTupleHeaderSetCmin(htup, FirstCommandId); ItemPointerSetBlockNumber(&htup->t_ctid, blkno); ItemPointerSetOffsetNumber(&htup->t_ctid, offnum); @@ -684,8 +759,8 @@ static void heap_xlog_update(XLogReaderState *record, bool hot_update) { XLogRecPtr lsn = record->EndRecPtr; - xl_heap_update *xlrec = (xl_heap_update *) XLogRecGetData(record); RelFileLocator rlocator; + xl_heap_update *xlrec; BlockNumber oldblk; BlockNumber newblk; ItemPointerData newtid; @@ -709,6 +784,20 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) Size freespace = 0; XLogRedoAction oldaction; XLogRedoAction newaction; + bool isinit = (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(record); + TransactionId xid_base = InvalidTransactionId, + multi_base = InvalidTransactionId; + + if (isinit) + { + xid_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + multi_base = *((TransactionId *) rec_data); + rec_data += sizeof(TransactionId); + } + + xlrec = (xl_heap_update *) rec_data; /* initialize to keep the compiler quiet */ oldtup.t_data = NULL; @@ -755,6 +844,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) &obuffer); if (oldaction == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = BufferGetPage(obuffer); offnum = xlrec->old_offnum; if (PageGetMaxOffsetNumber(page) >= offnum) @@ -767,6 +858,8 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) oldtup.t_data = htup; oldtup.t_len = ItemIdGetLength(lp); + /* Toast tuples are never updated. */ + HeapTupleCopyXidsFromPage(obuffer, &oldtup, page, false); htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED); htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; @@ -776,13 +869,15 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) HeapTupleHeaderClearHotUpdated(htup); fix_infomask_from_infobits(xlrec->old_infobits_set, &htup->t_infomask, &htup->t_infomask2); - HeapTupleHeaderSetXmax(htup, xlrec->old_xmax); + tuple.t_data = htup; + HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->old_xmax, false); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); /* Set forward chain link in t_ctid */ htup->t_ctid = newtid; /* Mark the page as a candidate for pruning */ - PageSetPrunable(page, XLogRecGetXid(record)); + /* Toast tuples are never updated. */ + PageSetPrunable(page, XLogRecGetXid(record), false); if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); @@ -799,11 +894,18 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) nbuffer = obuffer; newaction = oldaction; } - else if (XLogRecGetInfo(record) & XLOG_HEAP_INIT_PAGE) + else if (isinit) { + HeapPageSpecial special; + nbuffer = XLogInitBufferForRedo(record, 0); page = (Page) BufferGetPage(nbuffer); - PageInit(page, BufferGetPageSize(nbuffer), 0); + + /* Toast tuples are never updated. */ + PageInit(page, BufferGetPageSize(nbuffer), sizeof(HeapPageSpecialData)); + special = HeapPageGetSpecial(page); + special->pd_xid_base = xid_base; + special->pd_multi_base = multi_base; newaction = BLK_NEEDS_REDO; } else @@ -831,6 +933,7 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) char *recdata_end; Size datalen; Size tuplen; + HeapTupleData tuple; recdata = XLogRecGetBlockData(record, 0, &datalen); recdata_end = recdata + datalen; @@ -909,9 +1012,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) htup->t_infomask = xlhdr.t_infomask; htup->t_hoff = xlhdr.t_hoff; - HeapTupleHeaderSetXmin(htup, XLogRecGetXid(record)); + tuple.t_data = htup; + HeapTupleAndHeaderSetXmin(page, &tuple, XLogRecGetXid(record), false); HeapTupleHeaderSetCmin(htup, FirstCommandId); - HeapTupleHeaderSetXmax(htup, xlrec->new_xmax); + HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->new_xmax, false); /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = newtid; @@ -1028,6 +1132,8 @@ heap_xlog_lock(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = (Page) BufferGetPage(buffer); offnum = xlrec->offnum; @@ -1056,7 +1162,9 @@ heap_xlog_lock(XLogReaderState *record) BufferGetBlockNumber(buffer), offnum); } - HeapTupleHeaderSetXmax(htup, xlrec->xmax); + + tuple.t_data = htup; + HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->xmax, false); HeapTupleHeaderSetCmax(htup, FirstCommandId, false); PageSetLSN(page, lsn); MarkBufferDirty(buffer); @@ -1104,6 +1212,8 @@ heap_xlog_lock_updated(XLogReaderState *record) if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) { + HeapTupleData tuple; + page = BufferGetPage(buffer); offnum = xlrec->offnum; @@ -1119,7 +1229,8 @@ heap_xlog_lock_updated(XLogReaderState *record) htup->t_infomask2 &= ~HEAP_KEYS_UPDATED; fix_infomask_from_infobits(xlrec->infobits_set, &htup->t_infomask, &htup->t_infomask2); - HeapTupleHeaderSetXmax(htup, xlrec->xmax); + tuple.t_data = htup; + HeapTupleAndHeaderSetXmax(page, &tuple, xlrec->xmax, false); PageSetLSN(page, lsn); MarkBufferDirty(buffer); @@ -1272,6 +1383,10 @@ heap_mask(char *pagedata, BlockNumber blkno) mask_page_lsn_and_checksum(page); mask_page_hint_bits(page); + + /* Ignore prune_xid (it's like a hint-bit) */ + HeapPageSetPruneXid(page, InvalidTransactionId, false); + mask_unused_space(page); for (off = 1; off <= PageGetMaxOffsetNumber(page); off++) diff --git a/src/backend/access/heap/heaptoast.c b/src/backend/access/heap/heaptoast.c index cb1e57030f6..f1bbe9980b4 100644 --- a/src/backend/access/heap/heaptoast.c +++ b/src/backend/access/heap/heaptoast.c @@ -307,6 +307,7 @@ heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, result_tuple->t_len = new_tuple_len; result_tuple->t_self = newtup->t_self; result_tuple->t_tableOid = newtup->t_tableOid; + HeapTupleCopyXids(result_tuple, newtup); new_data = (HeapTupleHeader) ((char *) result_tuple + HEAPTUPLESIZE); result_tuple->t_data = new_data; @@ -395,6 +396,7 @@ toast_flatten_tuple(HeapTuple tup, TupleDesc tupleDesc) */ new_tuple->t_self = tup->t_self; new_tuple->t_tableOid = tup->t_tableOid; + HeapTupleCopyXids(new_tuple, tup); new_tuple->t_data->t_choice = tup->t_data->t_choice; new_tuple->t_data->t_ctid = tup->t_data->t_ctid; @@ -467,6 +469,7 @@ toast_flatten_tuple_to_datum(HeapTupleHeader tup, ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = tup; + HeapTupleSetZeroXids(&tmptup); /* * Break down the tuple into fields. diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index c482c9d61b2..ef0899fbe40 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -19,6 +19,7 @@ #include "access/hio.h" #include "access/htup_details.h" #include "access/visibilitymap.h" +#include "catalog/catalog.h" #include "storage/bufmgr.h" #include "storage/freespace.h" #include "storage/lmgr.h" @@ -58,6 +59,9 @@ RelationPutHeapTuple(Relation relation, /* Add the tuple to the page */ pageHeader = BufferGetPage(buffer); + HeapTupleHeaderStoreXmin(pageHeader, tuple, IsToastRelation(relation)); + HeapTupleHeaderStoreXmax(pageHeader, tuple, IsToastRelation(relation)); + offnum = PageAddItem(pageHeader, (Item) tuple->t_data, tuple->t_len, InvalidOffsetNumber, false, true); @@ -360,7 +364,17 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate, first_block, RelationGetRelationName(relation)); - PageInit(page, BufferGetPageSize(buffer), 0); + if (IsToastRelation(relation)) + { + PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData)); + ToastPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; + } + else + { + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; + } + MarkBufferDirty(buffer); /* @@ -393,7 +407,7 @@ RelationAddBlocks(Relation relation, BulkInsertState bistate, if (use_fsm && i >= not_in_fsm_pages) { Size freespace = BufferGetPageSize(victim_buffers[i]) - - SizeOfPageHeaderData; + SizeOfPageHeaderData - MAXALIGN(sizeof(HeapPageSpecialData)); RecordPageWithFreeSpace(relation, curBlock, freespace); } @@ -684,6 +698,9 @@ loop: /* * Now we can check to see if there's enough free space here. If so, * we're done. + * + * "Double xmax" page is not suitable for any new tuple, since xmin + * can't be set there. */ page = BufferGetPage(buffer); @@ -695,12 +712,23 @@ loop: */ if (PageIsNew(page)) { - PageInit(page, BufferGetPageSize(buffer), 0); + if (IsToastRelation(relation)) + { + PageInit(page, BufferGetPageSize(buffer), sizeof(ToastPageSpecialData)); + ToastPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; + } + else + { + PageInit(page, BufferGetPageSize(buffer), sizeof(HeapPageSpecialData)); + HeapPageGetSpecial(page)->pd_xid_base = RecentXmin - FirstNormalTransactionId; + } + MarkBufferDirty(buffer); } pageFreeSpace = PageGetHeapFreeSpace(page); - if (targetFreeSpace <= pageFreeSpace) + if (targetFreeSpace <= pageFreeSpace && + !HeapPageIsDoubleXmax(page)) { /* use this page as future insert target, too */ RelationSetTargetBlock(relation, targetBlock); diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index a8025889be0..fa31e6432c8 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -157,7 +157,7 @@ static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer); static inline HTSV_Result htsv_get_valid_status(int status); -static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, +static void heap_prune_chain(Relation relation, Buffer buffer, Page page, BlockNumber blockno, OffsetNumber maxoff, OffsetNumber rootoffnum, PruneState *prstate); static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid); static void heap_prune_record_redirect(PruneState *prstate, @@ -170,12 +170,23 @@ static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber o static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal); static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum); -static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum); +static void heap_prune_record_unchanged_lp_normal(Relation relation, Buffer buffer, Page page, PruneState *prstate, OffsetNumber offnum); static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum); static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum); static void page_verify_redirects(Page page); +static inline bool +XidFitsPage(Page page, TransactionId xid, bool is_toast) +{ + TransactionId base; + + base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base : + HeapPageGetSpecial(page)->pd_xid_base; + + return xid >= base + FirstNormalTransactionId && + xid <= base + MaxShortTransactionId; +} /* * Optionally prune and repair fragmentation in the specified page. @@ -210,7 +221,8 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * determining the appropriate horizon is a waste if there's no prune_xid * (i.e. no updates/deletes left potentially dead tuples around). */ - prune_xid = ((PageHeader) page)->pd_prune_xid; + prune_xid = HeapPageGetPruneXidNoAssert(page, IsToastRelation(relation)); + if (!TransactionIdIsValid(prune_xid)) return; @@ -261,7 +273,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer) * that during on-access pruning with the current implementation. */ heap_page_prune_and_freeze(relation, buffer, vistest, 0, - NULL, &presult, PRUNE_ON_ACCESS, &dummy_off_loc, NULL, NULL); + NULL, &presult, PRUNE_ON_ACCESS, &dummy_off_loc, NULL, NULL, false); /* * Report the number of tuples reclaimed to pgstats. This is @@ -355,7 +367,8 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, - MultiXactId *new_relmin_mxid) + MultiXactId *new_relmin_mxid, + bool repairFragmentation) { Page page = BufferGetPage(buffer); BlockNumber blockno = BufferGetBlockNumber(buffer); @@ -540,6 +553,8 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, htup = (HeapTupleHeader) PageGetItem(page, itemid); tup.t_data = htup; tup.t_len = ItemIdGetLength(itemid); + HeapTupleCopyXidsFromPage(buffer, &tup, page, + IsToastRelation(relation)); ItemPointerSet(&tup.t_self, blockno, offnum); prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup, @@ -580,7 +595,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, *off_loc = offnum; /* Process this item or chain of items */ - heap_prune_chain(page, blockno, maxoff, offnum, &prstate); + heap_prune_chain(relation, buffer, page, blockno, maxoff, offnum, &prstate); } /* @@ -614,10 +629,15 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, { ItemId itemid = PageGetItemId(page, offnum); HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid); + tup.t_data = htup; + tup.t_len = ItemIdGetLength(itemid); + HeapTupleCopyXidsFromPage(buffer, &tup, page, + IsToastRelation(relation)); + ItemPointerSet(&tup.t_self, blockno, offnum); if (likely(!HeapTupleHeaderIsHotUpdated(htup))) { - HeapTupleHeaderAdvanceConflictHorizon(htup, + HeapTupleHeaderAdvanceConflictHorizon(&tup, &prstate.latest_xid_removed); heap_prune_record_unused(&prstate, offnum, true); } @@ -636,7 +656,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, } } else - heap_prune_record_unchanged_lp_normal(page, &prstate, offnum); + heap_prune_record_unchanged_lp_normal(relation, buffer, page, &prstate, offnum); } /* We should now have processed every tuple exactly once */ @@ -663,7 +683,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, * pd_prune_xid field or the page was marked full, we will update the hint * bit. */ - do_hint = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid || + do_hint = HeapPageGetPruneXid(page, IsToastRelation(relation)) != prstate.new_prune_xid || PageIsFull(page); /* @@ -727,7 +747,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, * Validate the tuples we will be freezing before entering the * critical section. */ - heap_pre_freeze_checks(buffer, prstate.frozen, prstate.nfrozen); + heap_pre_freeze_checks(relation, buffer, prstate.frozen, prstate.nfrozen); } else if (prstate.nfrozen > 0) { @@ -759,7 +779,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, * Update the page's pd_prune_xid field to either zero, or the lowest * XID of any soon-prunable tuple. */ - ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid; + HeapPageSetPruneXid(page, prstate.new_prune_xid, IsToastRelation(relation)); /* * Also clear the "page is full" flag, since there's no point in @@ -785,11 +805,13 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, heap_page_prune_execute(buffer, false, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, - prstate.nowunused, prstate.nunused); + prstate.nowunused, prstate.nunused, + repairFragmentation, + IsToastRelation(relation)); } if (do_freeze) - heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen); + heap_freeze_prepared_tuples(relation, buffer, prstate.frozen, prstate.nfrozen); MarkBufferDirty(buffer); @@ -840,7 +862,8 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer, prstate.frozen, prstate.nfrozen, prstate.redirected, prstate.nredirected, prstate.nowdead, prstate.ndead, - prstate.nowunused, prstate.nunused); + prstate.nowunused, prstate.nunused, + repairFragmentation); } } @@ -996,7 +1019,7 @@ htsv_get_valid_status(int status) * based on that outcome. */ static void -heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, +heap_prune_chain(Relation relation, Buffer buffer, Page page, BlockNumber blockno, OffsetNumber maxoff, OffsetNumber rootoffnum, PruneState *prstate) { TransactionId priorXmax = InvalidTransactionId; @@ -1010,6 +1033,9 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, */ int ndeadchain = 0, nchain = 0; + HeapTupleData tup; + + tup.t_tableOid = RelationGetRelid(relation); rootlp = PageGetItemId(page, rootoffnum); @@ -1065,11 +1091,17 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, htup = (HeapTupleHeader) PageGetItem(page, lp); + tup.t_data = htup; + tup.t_len = ItemIdGetLength(lp); + HeapTupleCopyXidsFromPage(buffer, &tup, page, + IsToastRelation(relation)); + ItemPointerSet(&(tup.t_self), BufferGetBlockNumber(buffer), offnum); + /* * Check the tuple XMIN against prior XMAX, if any */ if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) + !TransactionIdEquals(HeapTupleGetXmin(&tup), priorXmax)) break; /* @@ -1083,7 +1115,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, /* Remember the last DEAD tuple seen */ ndeadchain = nchain; - HeapTupleHeaderAdvanceConflictHorizon(htup, + HeapTupleHeaderAdvanceConflictHorizon(&tup, &prstate->latest_xid_removed); /* Advance to next chain member */ break; @@ -1133,7 +1165,7 @@ heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff, */ Assert(ItemPointerGetBlockNumber(&htup->t_ctid) == blockno); offnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } if (ItemIdIsRedirected(rootlp) && nchain < 2) @@ -1166,7 +1198,7 @@ process_chain: i++; } for (; i < nchain; i++) - heap_prune_record_unchanged_lp_normal(page, prstate, chainitems[i]); + heap_prune_record_unchanged_lp_normal(relation, buffer, page, prstate, chainitems[i]); } else if (ndeadchain == nchain) { @@ -1192,7 +1224,7 @@ process_chain: /* the rest of tuples in the chain are normal, unchanged tuples */ for (int i = ndeadchain; i < nchain; i++) - heap_prune_record_unchanged_lp_normal(page, prstate, chainitems[i]); + heap_prune_record_unchanged_lp_normal(relation, buffer, page, prstate, chainitems[i]); } } @@ -1327,9 +1359,9 @@ heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumb * update bookkeeping of tuple counts and page visibility. */ static void -heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum) +heap_prune_record_unchanged_lp_normal(Relation relation, Buffer buffer, Page page, PruneState *prstate, OffsetNumber offnum) { - HeapTupleHeader htup; + HeapTupleData tup; Assert(!prstate->processed[offnum]); prstate->processed[offnum] = true; @@ -1356,7 +1388,9 @@ heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumb * will violate this optimistic assumption, but the overall impact of that * should be negligible.) */ - htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, offnum)); + tup.t_data = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, offnum)); + HeapTupleCopyXidsFromPage(buffer, &tup, page, + IsToastRelation(relation)); switch (prstate->htsv[offnum]) { @@ -1378,9 +1412,7 @@ heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumb */ if (prstate->all_visible) { - TransactionId xmin; - - if (!HeapTupleHeaderXminCommitted(htup)) + if (!HeapTupleHeaderXminCommitted(tup.t_data)) { prstate->all_visible = false; break; @@ -1393,7 +1425,6 @@ heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumb * there is a snapshot that considers this xid to still be * running, and if so, we don't consider the page all-visible. */ - xmin = HeapTupleHeaderGetXmin(htup); /* * For now always use prstate->cutoffs for this test, because @@ -1402,16 +1433,16 @@ heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumb * non-freezing caller wanted to set the VM bit. */ Assert(prstate->cutoffs); - if (!TransactionIdPrecedes(xmin, prstate->cutoffs->OldestXmin)) + if (!TransactionIdPrecedes(tup.t_xmin, prstate->cutoffs->OldestXmin)) { prstate->all_visible = false; break; } /* Track newest xmin on page. */ - if (TransactionIdFollows(xmin, prstate->visibility_cutoff_xid) && - TransactionIdIsNormal(xmin)) - prstate->visibility_cutoff_xid = xmin; + if (TransactionIdFollows(tup.t_xmin, prstate->visibility_cutoff_xid) && + TransactionIdIsNormal(tup.t_xmin)) + prstate->visibility_cutoff_xid = tup.t_xmin; } break; @@ -1424,7 +1455,7 @@ heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumb * that the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, - HeapTupleHeaderGetUpdateXid(htup)); + HeapTupleGetUpdateXidAny(&tup)); break; case HEAPTUPLE_INSERT_IN_PROGRESS: @@ -1461,7 +1492,7 @@ heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumb * the page is reconsidered for pruning in future. */ heap_prune_record_prunable(prstate, - HeapTupleHeaderGetUpdateXid(htup)); + HeapTupleGetUpdateXidAny(&tup)); break; default: @@ -1480,7 +1511,7 @@ heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumb { bool totally_frozen; - if ((heap_prepare_freeze_tuple(htup, + if ((heap_prepare_freeze_tuple(&tup, prstate->cutoffs, &prstate->pagefrz, &prstate->frozen[prstate->nfrozen], @@ -1561,7 +1592,9 @@ void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, - OffsetNumber *nowunused, int nunused) + OffsetNumber *nowunused, int nunused, + bool repairFragmentation, + bool is_toast) { Page page = (Page) BufferGetPage(buffer); OffsetNumber *offnum; @@ -1709,7 +1742,8 @@ heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, * Finally, repair any fragmentation, and update the page's hint bit * about whether it has free pointers. */ - PageRepairFragmentation(page); + if (repairFragmentation) + PageRepairFragmentation(page, is_toast); /* * Now that the page has been modified, assert that redirect items @@ -1782,7 +1816,8 @@ page_verify_redirects(Page page) * and reused by a completely unrelated tuple. */ void -heap_get_root_tuples(Page page, OffsetNumber *root_offsets) +heap_get_root_tuples(Relation relation, Buffer buffer, Page page, + OffsetNumber *root_offsets) { OffsetNumber offnum, maxoff; @@ -1797,6 +1832,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) HeapTupleHeader htup; OffsetNumber nextoffnum; TransactionId priorXmax; + HeapTupleData tup; /* skip unused and dead items */ if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp)) @@ -1805,6 +1841,9 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) if (ItemIdIsNormal(lp)) { htup = (HeapTupleHeader) PageGetItem(page, lp); + tup.t_data = htup; + HeapTupleCopyXidsFromPage(buffer, &tup, page, + IsToastRelation(relation)); /* * Check if this tuple is part of a HOT-chain rooted at some other @@ -1826,7 +1865,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) /* Set up to scan the HOT-chain */ nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } else { @@ -1865,9 +1904,12 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) break; htup = (HeapTupleHeader) PageGetItem(page, lp); + tup.t_data = htup; + HeapTupleCopyXidsFromPage(buffer, &tup, page, + IsToastRelation(relation)); if (TransactionIdIsValid(priorXmax) && - !TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup))) + !TransactionIdEquals(priorXmax, HeapTupleGetXmin(&tup))) break; /* Remember the root line pointer for this item */ @@ -1881,7 +1923,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) Assert(!HeapTupleHeaderIndicatesMovedPartitions(htup)); nextoffnum = ItemPointerGetOffsetNumber(&htup->t_ctid); - priorXmax = HeapTupleHeaderGetUpdateXid(htup); + priorXmax = HeapTupleGetUpdateXidAny(&tup); } } } @@ -2057,7 +2099,8 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, - OffsetNumber *unused, int nunused) + OffsetNumber *unused, int nunused, + bool repairFragmentation) { xl_heap_prune xlrec; XLogRecPtr recptr; @@ -2073,6 +2116,12 @@ log_heap_prune_and_freeze(Relation relation, Buffer buffer, xlrec.flags = 0; + if (IsToastRelation(relation)) + xlrec.flags |= XLHP_ON_TOAST_RELATION; + + if (repairFragmentation) + xlrec.flags |= XLHP_REPAIR_FRAGMENTATION; + /* * Prepare data for the buffer. The arrays are not actually in the * buffer, but we pretend that they are. When XLogInsert stores a full diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index e6d2b5fced1..67180c186d6 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -359,6 +359,7 @@ rewrite_heap_tuple(RewriteState state, &old_tuple->t_data->t_choice.t_heap, sizeof(HeapTupleFields)); + HeapTupleCopyXids(new_tuple, old_tuple); new_tuple->t_data->t_infomask &= ~HEAP_XACT_MASK; new_tuple->t_data->t_infomask2 &= ~HEAP2_XACT_MASK; new_tuple->t_data->t_infomask |= @@ -368,7 +369,7 @@ rewrite_heap_tuple(RewriteState state, * While we have our hands on the tuple, we may as well freeze any * eligible xmin or xmax, so that future VACUUM effort can be saved. */ - heap_freeze_tuple(new_tuple->t_data, + heap_freeze_tuple(new_tuple, state->rs_old_rel->rd_rel->relfrozenxid, state->rs_old_rel->rd_rel->relminmxid, state->rs_freeze_xid, @@ -384,7 +385,7 @@ rewrite_heap_tuple(RewriteState state, * If the tuple has been updated, check the old-to-new mapping hash table. */ if (!((old_tuple->t_data->t_infomask & HEAP_XMAX_INVALID) || - HeapTupleHeaderIsOnlyLocked(old_tuple->t_data)) && + HeapTupleIsOnlyLocked(old_tuple)) && !HeapTupleHeaderIndicatesMovedPartitions(old_tuple->t_data) && !(ItemPointerEquals(&(old_tuple->t_self), &(old_tuple->t_data->t_ctid)))) @@ -392,7 +393,7 @@ rewrite_heap_tuple(RewriteState state, OldToNewMapping mapping; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetUpdateXid(old_tuple->t_data); + hashkey.xmin = HeapTupleGetUpdateXidAny(old_tuple); hashkey.tid = old_tuple->t_data->t_ctid; mapping = (OldToNewMapping) @@ -465,7 +466,7 @@ rewrite_heap_tuple(RewriteState state, * RECENTLY_DEAD if and only if the xmin is not before OldestXmin. */ if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(new_tuple), state->rs_oldest_xmin)) { /* @@ -474,7 +475,7 @@ rewrite_heap_tuple(RewriteState state, UnresolvedTup unresolved; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); + hashkey.xmin = HeapTupleGetXmin(new_tuple); hashkey.tid = old_tid; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, @@ -562,7 +563,7 @@ rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple) bool found; memset(&hashkey, 0, sizeof(hashkey)); - hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data); + hashkey.xmin = HeapTupleGetXmin(old_tuple); hashkey.tid = old_tuple->t_self; unresolved = hash_search(state->rs_unresolved_tups, &hashkey, @@ -598,6 +599,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup) Size len; OffsetNumber newoff; HeapTuple heaptup; + TransactionId xmin; + bool immutable_tuple; /* * If the new tuple is too big for storage or contains already toasted @@ -632,9 +635,19 @@ raw_heap_insert(RewriteState state, HeapTuple tup) len = MAXALIGN(heaptup->t_len); /* be conservative */ /* - * If we're gonna fail for oversize tuple, do it right away + * Due to update to 64-xid maximum plain tuple size was decreased due to adding + * PageSpecial to a heap page. Pages with tuple that became too large to fit, + * should remain in Double Xmax format (read only). Inserting plain tuples with + * size over new MaxHeapTupleSizs is prohibited anyway, but vaccum full will + * transfer this page to a rebuild relation unmodified. */ - if (len > MaxHeapTupleSize) + immutable_tuple = len <= MaxHeapTupleSize_32 && len > MaxHeapTupleSize; + + /* + * If we're gonna fail for oversize tuple, do it right away. But allow to process + * immutable_tuple (see above). + */ + if (len > MaxHeapTupleSize && !immutable_tuple) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("row is too big: size %zu, maximum size %zu", @@ -669,9 +682,41 @@ raw_heap_insert(RewriteState state, HeapTuple tup) /* Initialize a new empty page */ state->rs_buffer = smgr_bulk_get_buf(state->rs_bulkstate); page = (Page) state->rs_buffer; - PageInit(page, BLCKSZ, 0); + if (immutable_tuple) + /* Initialize DoubleXmax page */ + PageInit(page, BLCKSZ, 0); + else + { + Size special_size; + + special_size = IsToastRelation(state->rs_new_rel) ? + sizeof(ToastPageSpecialData) : + sizeof(HeapPageSpecialData); + PageInit(page, BLCKSZ, special_size); + } } + rewrite_page_prepare_for_xid(page, heaptup, + IsToastRelation(state->rs_new_rel)); + + /* + * Tuple with HEAP_XMIN_FROZEN in t_infomask should have xmin set + * to FrozenTransactionId to avoid these tuples be treated like normal. + */ + xmin = HeapTupleGetXmin(heaptup); + HeapTupleSetXmin(heaptup, xmin); + + /* + * Tuples on DoubleXmax page could not appear modified after they had been + * frozen by pg_upgrade. Just check this to be safe. + */ + Assert(!immutable_tuple || xmin == FrozenTransactionId); + + if (!immutable_tuple) + HeapTupleAndHeaderSetXmin(page, heaptup, xmin, false); + + HeapTupleHeaderStoreXmax(page, heaptup, false); + /* And now we can insert the tuple into the page */ newoff = PageAddItem(page, (Item) heaptup->t_data, heaptup->t_len, InvalidOffsetNumber, false, true); @@ -952,19 +997,24 @@ logical_rewrite_log_mapping(RewriteState state, TransactionId xid, */ if (!found) { - char path[MAXPGPATH]; - Oid dboid; + char path[MAXPGPATH]; + Oid dboid; + TransactionId current_xid; if (state->rs_old_rel->rd_rel->relisshared) dboid = InvalidOid; else dboid = MyDatabaseId; + current_xid = GetCurrentTransactionId(); snprintf(path, MAXPGPATH, "%s/" LOGICAL_REWRITE_FORMAT, PG_LOGICAL_MAPPINGS_DIR, dboid, relid, LSN_FORMAT_ARGS(state->rs_begin_lsn), - xid, GetCurrentTransactionId()); + (uint32) (xid >> 32), + (uint32) xid, + (uint32) (current_xid >> 32), + (uint32) current_xid); dclist_init(&src->mappings); src->off = 0; @@ -1011,9 +1061,9 @@ logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid, if (!state->rs_logical_rewrite) return; - xmin = HeapTupleHeaderGetXmin(new_tuple->t_data); + xmin = HeapTupleGetXmin(new_tuple); /* use *GetUpdateXid to correctly deal with multixacts */ - xmax = HeapTupleHeaderGetUpdateXid(new_tuple->t_data); + xmax = HeapTupleGetUpdateXidAny(new_tuple); /* * Log the mapping iff the tuple has been created recently. @@ -1077,14 +1127,19 @@ heap_xlog_logical_rewrite(XLogReaderState *r) xl_heap_rewrite_mapping *xlrec; uint32 len; char *data; + TransactionId xid; xlrec = (xl_heap_rewrite_mapping *) XLogRecGetData(r); + xid = XLogRecGetXid(r); snprintf(path, MAXPGPATH, "%s/" LOGICAL_REWRITE_FORMAT, PG_LOGICAL_MAPPINGS_DIR, xlrec->mapped_db, xlrec->mapped_rel, LSN_FORMAT_ARGS(xlrec->start_lsn), - xlrec->mapped_xid, XLogRecGetXid(r)); + (uint32) (xlrec->mapped_xid >> 32), + (uint32) xlrec->mapped_xid, + (uint32) (xid >> 32), + (uint32) xid); fd = OpenTransientFile(path, O_CREAT | O_WRONLY | PG_BINARY); @@ -1179,10 +1234,12 @@ CheckPointLogicalRewriteHeap(void) Oid dboid; Oid relid; XLogRecPtr lsn; - TransactionId rewrite_xid; - TransactionId create_xid; - uint32 hi, - lo; + uint32 lsn_hi, + lsn_lo, + rewrite_xid_hi, + rewrite_xid_lo, + create_xid_hi, + create_xid_lo; PGFileType de_type; if (strcmp(mapping_de->d_name, ".") == 0 || @@ -1200,10 +1257,12 @@ CheckPointLogicalRewriteHeap(void) continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, - &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6) + &dboid, &relid, &lsn_hi, &lsn_lo, + &rewrite_xid_hi, &rewrite_xid_lo, + &create_xid_hi, &create_xid_lo) != 8) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); - lsn = ((uint64) hi) << 32 | lo; + lsn = ((uint64) lsn_hi) << 32 | lsn_lo; if (lsn < cutoff || cutoff == InvalidXLogRecPtr) { diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 0df5aee54fc..1264ad7b201 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -139,6 +139,7 @@ #include "access/transam.h" #include "access/visibilitymap.h" #include "access/xloginsert.h" +#include "catalog/catalog.h" #include "catalog/storage.h" #include "commands/dbcommands.h" #include "commands/progress.h" @@ -444,7 +445,6 @@ static void lazy_vacuum_heap_rel(LVRelState *vacrel); static void lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, OffsetNumber *deadoffsets, int num_offsets, Buffer vmbuffer); -static bool lazy_check_wraparound_failsafe(LVRelState *vacrel); static void lazy_cleanup_all_indexes(LVRelState *vacrel); static IndexBulkDeleteResult *lazy_vacuum_one_index(Relation indrel, IndexBulkDeleteResult *istat, @@ -828,7 +828,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * ensure that parallel VACUUM won't be attempted at all when relfrozenxid * is already dangerously old.) */ - lazy_check_wraparound_failsafe(vacrel); dead_items_alloc(vacrel, params->nworkers); /* @@ -956,7 +955,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, BufferUsage bufferusage; StringInfoData buf; char *msgfmt; - int32 diff; + int64 diff; double read_rate = 0, write_rate = 0; int64 total_blks_hit; @@ -1020,16 +1019,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, orig_rel_pages, vacrel->eager_scanned_pages); appendStringInfo(&buf, - _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"), + _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable, oldest xmin: %llu\n"), (long long) vacrel->tuples_deleted, (long long) vacrel->new_rel_tuples, - (long long) vacrel->recently_dead_tuples); + (long long) vacrel->recently_dead_tuples, + (unsigned long long) vacrel->cutoffs.OldestXmin); if (vacrel->missed_dead_tuples > 0) appendStringInfo(&buf, _("tuples missed: %lld dead from %u pages not removed due to cleanup lock contention\n"), (long long) vacrel->missed_dead_tuples, vacrel->missed_dead_pages); - diff = (int32) (ReadNextTransactionId() - + diff = (int64) (ReadNextTransactionId() - vacrel->cutoffs.OldestXmin); appendStringInfo(&buf, _("removable cutoff: %llu, which was %lld XIDs old when operation ended\n"), @@ -1037,7 +1037,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, (long long) diff); if (frozenxid_updated) { - diff = (int32) (vacrel->NewRelfrozenXid - + diff = (int64) (vacrel->NewRelfrozenXid - vacrel->cutoffs.relfrozenxid); appendStringInfo(&buf, _("new relfrozenxid: %llu, which is %lld XIDs ahead of previous value\n"), @@ -1046,7 +1046,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, } if (minmulti_updated) { - diff = (int32) (vacrel->NewRelminMxid - + diff = (int64) (vacrel->NewRelminMxid - vacrel->cutoffs.relminmxid); appendStringInfo(&buf, _("new relminmxid: %llu, which is %lld MXIDs ahead of previous value\n"), @@ -1249,19 +1249,6 @@ lazy_scan_heap(LVRelState *vacrel) vacuum_delay_point(false); - /* - * Regularly check if wraparound failsafe should trigger. - * - * There is a similar check inside lazy_vacuum_all_indexes(), but - * relfrozenxid might start to look dangerously old before we reach - * that point. This check also provides failsafe coverage for the - * one-pass strategy, and the two-pass strategy with the index_cleanup - * param set to 'off'. - */ - if (vacrel->scanned_pages > 0 && - vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0) - lazy_check_wraparound_failsafe(vacrel); - /* * Consider if we definitely have enough space to process TIDs on page * already. If we are close to overrunning the available space for @@ -1827,7 +1814,14 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, if (GetRecordedFreeSpace(vacrel->rel, blkno) == 0) { - freespace = BLCKSZ - SizeOfPageHeaderData; + Size special_size; + + special_size = IsToastRelation(vacrel->rel) ? + sizeof(ToastPageSpecialData) : + sizeof(HeapPageSpecialData); + freespace = BufferGetPageSize(buf) + - SizeOfPageHeaderData + - special_size; RecordPageWithFreeSpace(vacrel->rel, blkno, freespace); } @@ -1977,7 +1971,8 @@ lazy_scan_prune(LVRelState *vacrel, heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options, &vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN, &vacrel->offnum, - &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid); + &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid, + true); Assert(MultiXactIdIsValid(vacrel->NewRelminMxid)); Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid)); @@ -2246,7 +2241,6 @@ lazy_scan_noprune(LVRelState *vacrel, recently_dead_tuples, missed_dead_tuples; bool hastup; - HeapTupleHeader tupleheader; TransactionId NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid; MultiXactId NoFreezePageRelminMxid = vacrel->NewRelminMxid; OffsetNumber deadoffsets[MaxHeapTuplesPerPage]; @@ -2291,8 +2285,13 @@ lazy_scan_noprune(LVRelState *vacrel, } hastup = true; /* page prevents rel truncation */ - tupleheader = (HeapTupleHeader) PageGetItem(page, itemid); - if (heap_tuple_should_freeze(tupleheader, &vacrel->cutoffs, + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(vacrel->rel); + HeapTupleCopyXidsFromPage(buf, &tuple, page, + IsToastRelation(vacrel->rel)); + ItemPointerSet(&(tuple.t_self), blkno, offnum); + if (heap_tuple_should_freeze(&tuple, &vacrel->cutoffs, &NoFreezePageRelfrozenXid, &NoFreezePageRelminMxid)) { @@ -2328,6 +2327,8 @@ lazy_scan_noprune(LVRelState *vacrel, tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(vacrel->rel); + HeapTupleCopyXidsFromPage(buf, &tuple, page, + IsToastRelation(vacrel->rel)); switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin, buf)) @@ -2589,13 +2590,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) Assert(vacrel->do_index_vacuuming); Assert(vacrel->do_index_cleanup); - /* Precheck for XID wraparound emergencies */ - if (lazy_check_wraparound_failsafe(vacrel)) - { - /* Wraparound emergency -- don't even start an index scan */ - return false; - } - /* * Report that we are now vacuuming indexes and the number of indexes to * vacuum. @@ -2619,12 +2613,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) pgstat_progress_update_param(PROGRESS_VACUUM_INDEXES_PROCESSED, idx + 1); - if (lazy_check_wraparound_failsafe(vacrel)) - { - /* Wraparound emergency -- end current index scan */ - allindexes = false; - break; - } } } else @@ -2632,13 +2620,6 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) /* Outsource everything to parallel variant */ parallel_vacuum_bulkdel_all_indexes(vacrel->pvs, old_live_tuples, vacrel->num_index_scans); - - /* - * Do a postcheck to consider applying wraparound failsafe now. Note - * that parallel VACUUM only gets the precheck and this postcheck. - */ - if (lazy_check_wraparound_failsafe(vacrel)) - allindexes = false; } /* @@ -2876,7 +2857,8 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, NULL, 0, /* frozen */ NULL, 0, /* redirected */ NULL, 0, /* dead */ - unused, nunused); + unused, nunused, + true); /*repair fragmentation*/ } /* @@ -2932,68 +2914,6 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, restore_vacuum_error_info(vacrel, &saved_err_info); } -/* - * Trigger the failsafe to avoid wraparound failure when vacrel table has a - * relfrozenxid and/or relminmxid that is dangerously far in the past. - * Triggering the failsafe makes the ongoing VACUUM bypass any further index - * vacuuming and heap vacuuming. Truncating the heap is also bypassed. - * - * Any remaining work (work that VACUUM cannot just bypass) is typically sped - * up when the failsafe triggers. VACUUM stops applying any cost-based delay - * that it started out with. - * - * Returns true when failsafe has been triggered. - */ -static bool -lazy_check_wraparound_failsafe(LVRelState *vacrel) -{ - /* Don't warn more than once per VACUUM */ - if (VacuumFailsafeActive) - return true; - - if (unlikely(vacuum_xid_failsafe_check(&vacrel->cutoffs))) - { - const int progress_index[] = { - PROGRESS_VACUUM_INDEXES_TOTAL, - PROGRESS_VACUUM_INDEXES_PROCESSED - }; - int64 progress_val[2] = {0, 0}; - - VacuumFailsafeActive = true; - - /* - * Abandon use of a buffer access strategy to allow use of all of - * shared buffers. We assume the caller who allocated the memory for - * the BufferAccessStrategy will free it. - */ - vacrel->bstrategy = NULL; - - /* Disable index vacuuming, index cleanup, and heap rel truncation */ - vacrel->do_index_vacuuming = false; - vacrel->do_index_cleanup = false; - vacrel->do_rel_truncate = false; - - /* Reset the progress counters */ - pgstat_progress_update_multi_param(2, progress_index, progress_val); - - ereport(WARNING, - (errmsg("bypassing nonessential maintenance of table \"%s.%s.%s\" as a failsafe after %d index scans", - vacrel->dbname, vacrel->relnamespace, vacrel->relname, - vacrel->num_index_scans), - errdetail("The table's relfrozenxid or relminmxid is too far in the past."), - errhint("Consider increasing configuration parameter \"maintenance_work_mem\" or \"autovacuum_work_mem\".\n" - "You might also need to consider other ways for VACUUM to keep up with the allocation of transaction IDs."))); - - /* Stop applying cost limits from this point on */ - VacuumCostActive = false; - VacuumCostBalance = 0; - - return true; - } - - return false; -} - /* * lazy_cleanup_all_indexes() -- cleanup all indexes of relation. */ @@ -3650,7 +3570,8 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationGetRelid(vacrel->rel); - + HeapTupleCopyXidsFromPage(buf, &tuple, page, + IsToastRelation(vacrel->rel)); switch (HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin, buf)) { @@ -3670,7 +3591,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, * The inserter definitely committed. But is it old enough * that everyone sees it as committed? */ - xmin = HeapTupleHeaderGetXmin(tuple.t_data); + xmin = HeapTupleGetXmin(&tuple); if (!TransactionIdPrecedes(xmin, vacrel->cutoffs.OldestXmin)) { @@ -3686,7 +3607,7 @@ heap_page_is_all_visible(LVRelState *vacrel, Buffer buf, /* Check whether this tuple is already frozen or not */ if (all_visible && *all_frozen && - heap_tuple_needs_eventual_freeze(tuple.t_data)) + heap_tuple_needs_eventual_freeze(&tuple)) *all_frozen = false; } break; diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index c79dd38ee18..417fdab8be4 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -549,6 +549,7 @@ _bt_getroot(Relation rel, Relation heaprel, int access) rootblkno = rootopaque->btpo_next; } + /* Note: can't check btpo_level on deleted pages */ if (rootopaque->btpo_level != rootlevel) elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u", rootblkno, RelationGetRelationName(rel), @@ -652,6 +653,7 @@ _bt_gettrueroot(Relation rel) rootblkno = rootopaque->btpo_next; } + /* Note: can't check btpo_level on deleted pages */ if (rootopaque->btpo_level != rootlevel) elog(ERROR, "root page %u of index \"%s\" has level %u, expected %u", rootblkno, RelationGetRelationName(rel), diff --git a/src/backend/access/nbtree/nbtsplitloc.c b/src/backend/access/nbtree/nbtsplitloc.c index e6c9aaa0454..13ecfe324c6 100644 --- a/src/backend/access/nbtree/nbtsplitloc.c +++ b/src/backend/access/nbtree/nbtsplitloc.c @@ -140,6 +140,7 @@ _bt_findsplitloc(Relation rel, olddataitemstoleft, perfectpenalty, leaffillfactor; + int maxTupleEnd PG_USED_FOR_ASSERTS_ONLY; FindSplitData state; FindSplitStrat strategy; ItemId itemid; @@ -153,6 +154,7 @@ _bt_findsplitloc(Relation rel, opaque = BTPageGetOpaque(origpage); maxoff = PageGetMaxOffsetNumber(origpage); + maxTupleEnd = ItemIdGetTupleEnd(PageGetItemId(origpage, P_HIKEY)); /* Total free space available on a btree page, after fixed overhead */ leftspace = rightspace = @@ -214,6 +216,18 @@ _bt_findsplitloc(Relation rel, itemid = PageGetItemId(origpage, offnum); itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData); +#ifdef USE_ASSERT_CHECKING + + /* + * Ending of rightmost tuple on a page can be shifted relative to left + * boundary of BTPageOpaqueData due to conversion from EE96, which + * used different BTPageOpaqueData layout. It is only checked in the + * assert below. + */ + if (maxTupleEnd < ItemIdGetTupleEnd(itemid)) + maxTupleEnd = ItemIdGetTupleEnd(itemid); +#endif + /* * When item offset number is not newitemoff, neither side of the * split can be newitem. Record a split after the previous data item @@ -248,7 +262,7 @@ _bt_findsplitloc(Relation rel, * (Though only when it's possible that newitem will end up alone on new * right page.) */ - Assert(olddataitemstoleft == olddataitemstotal); + Assert(olddataitemstoleft + ((PageHeader) origpage)->pd_special - maxTupleEnd == olddataitemstotal); if (newitemoff > maxoff) _bt_recsplitloc(&state, newitemoff, false, olddataitemstotal, 0); diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index 4ee25159a41..25c6bd56468 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -28,7 +28,7 @@ out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec) appendStringInfo(buf, "rel %u/%u/%u; blk %u; snapshotConflictHorizon %llu, isCatalogRel %c", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber, xlrec->block, - (unsigned long long) U64FromFullTransactionId(xlrec->snapshotConflictHorizon), + (unsigned long long) XidFromFullTransactionId(xlrec->snapshotConflictHorizon), xlrec->isCatalogRel ? 'T' : 'F'); } @@ -52,7 +52,7 @@ static void out_gistxlogPageDelete(StringInfo buf, gistxlogPageDelete *xlrec) { appendStringInfo(buf, "deleteXid %llu; downlink %u", - (unsigned long long) U64FromFullTransactionId(xlrec->deleteXid), + (unsigned long long) XidFromFullTransactionId(xlrec->deleteXid), xlrec->downlinkOffset); } diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index 30d1e6a0651..66ef74b1ea8 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -285,7 +285,7 @@ heap2_desc(StringInfo buf, XLogReaderState *record) memcpy(&conflict_xid, rec + SizeOfHeapPrune, sizeof(TransactionId)); /* XXX 64-bit conflict xid? - a.alekseev */ - appendStringInfo(buf, "snapshotConflictHorizon: %u", + appendStringInfo(buf, "snapshotConflictHorizon: %lu", conflict_xid); } @@ -392,6 +392,23 @@ heap2_desc(StringInfo buf, XLogReaderState *record) } } +void +heap3_desc(StringInfo buf, XLogReaderState *record) +{ + char *rec = XLogRecGetData(record); + uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK; + + info &= XLOG_HEAP_OPMASK; + if (info == XLOG_HEAP3_BASE_SHIFT) + { + xl_heap_base_shift *xlrec = (xl_heap_base_shift *) rec; + + appendStringInfo(buf, "%s delta %lld ", + xlrec->multi ? "MultiXactId" : "XactId", + (long long) xlrec->delta); + } +} + const char * heap_identify(uint8 info) { @@ -475,3 +492,18 @@ heap2_identify(uint8 info) return id; } + +const char * +heap3_identify(uint8 info) +{ + const char *id = NULL; + + switch (info & ~XLR_INFO_MASK) + { + case XLOG_HEAP3_BASE_SHIFT: + id = "BASE_SHIFT"; + break; + } + + return id; +} diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index 21f6c460abc..b9d0ea28adf 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -96,7 +96,7 @@ btree_desc(StringInfo buf, XLogReaderState *record) appendStringInfo(buf, "left: %u, right: %u, level: %u, safexid: %llu, ", xlrec->leftsib, xlrec->rightsib, xlrec->level, - (unsigned long long) U64FromFullTransactionId(xlrec->safexid)); + (unsigned long long) XidFromFullTransactionId(xlrec->safexid)); appendStringInfo(buf, "leafleft: %u, leafright: %u, leaftopparent: %u", xlrec->leafleftsib, xlrec->leafrightsib, xlrec->leaftopparent); @@ -116,7 +116,7 @@ btree_desc(StringInfo buf, XLogReaderState *record) appendStringInfo(buf, "rel: %u/%u/%u, snapshotConflictHorizon: %llu, isCatalogRel: %c", xlrec->locator.spcOid, xlrec->locator.dbOid, xlrec->locator.relNumber, - (unsigned long long) U64FromFullTransactionId(xlrec->snapshotConflictHorizon), + (unsigned long long) XidFromFullTransactionId(xlrec->snapshotConflictHorizon), xlrec->isCatalogRel ? 'T' : 'F'); break; } diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c index a28d87f6490..8d47579a486 100644 --- a/src/backend/access/rmgrdesc/xactdesc.c +++ b/src/backend/access/rmgrdesc/xactdesc.c @@ -110,7 +110,8 @@ ParseCommitRecord(uint8 info, xl_xact_commit *xlrec, xl_xact_parsed_commit *pars { xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; - parsed->twophase_xid = xl_twophase->xid; + parsed->twophase_xid = + ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo; data += sizeof(xl_xact_twophase); @@ -205,7 +206,8 @@ ParseAbortRecord(uint8 info, xl_xact_abort *xlrec, xl_xact_parsed_abort *parsed) { xl_xact_twophase *xl_twophase = (xl_xact_twophase *) data; - parsed->twophase_xid = xl_twophase->xid; + parsed->twophase_xid = + ((uint64) xl_twophase->xid_hi << 32) | xl_twophase->xid_lo; data += sizeof(xl_xact_twophase); diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index ced13c8a5df..3ac385ed6be 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -75,7 +75,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) checkpoint->PrevTimeLineID, checkpoint->fullPageWrites ? "true" : "false", get_wal_level_string(checkpoint->wal_level), - (unsigned long long) U64FromFullTransactionId(checkpoint->nextXid), + (unsigned long long) XidFromFullTransactionId(checkpoint->nextXid), checkpoint->nextOid, (unsigned long long) checkpoint->nextMulti, (unsigned long long) checkpoint->nextMultiOffset, diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index f468ceb9307..0d06e7d2a11 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -318,7 +318,7 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids, * sub-XIDs and all of the XIDs for which we're adjusting clog should be * on the same page. Check those conditions, too. */ - if (all_xact_same_page && xid == MyProc->xid && + if (all_xact_same_page && xid == pg_atomic_read_u64(&MyProc->xid) && nsubxids <= THRESHOLD_SUBTRANS_CLOG_OPT && nsubxids == MyProc->subxidStatus.count && (nsubxids == 0 || @@ -1032,24 +1032,11 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid) SimpleLruTruncate(XactCtl, cutoffPage); } - /* * Decide whether a CLOG page number is "older" for truncation purposes. * - * We need to use comparison of TransactionIds here in order to do the right - * thing with wraparound XID arithmetic. However, TransactionIdPrecedes() - * would get weird about permanent xact IDs. So, offset both such that xid1, - * xid2, and xid2 + CLOG_XACTS_PER_PAGE - 1 are all normal XIDs; this offset - * is relevant to page 0 and to the page preceding page 0. - * - * The page containing oldestXact-2^31 is the important edge case. The - * portion of that page equaling or following oldestXact-2^31 is expendable, - * but the portion preceding oldestXact-2^31 is not. When oldestXact-2^31 is - * the first XID of a page and segment, the entire page and segment is - * expendable, and we could truncate the segment. Recognizing that case would - * require making oldestXact, not just the page containing oldestXact, - * available to this callback. The benefit would be rare and small, so we - * don't optimize that edge case. + * With 64xid this function is just "<", but we left it as a function in order + * for its calls remain "vanilla" like. */ static bool CLOGPagePrecedes(int64 page1, int64 page2) diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 40e75e1afc1..7c36b8cbd02 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -947,25 +947,6 @@ AdvanceOldestCommitTsXid(TransactionId oldestXact) /* * Decide whether a commitTS page number is "older" for truncation purposes. * Analogous to CLOGPagePrecedes(). - * - * At default BLCKSZ, (1 << 31) % COMMIT_TS_XACTS_PER_PAGE == 128. This - * introduces differences compared to CLOG and the other SLRUs having (1 << - * 31) % per_page == 0. This function never tests exactly - * TransactionIdPrecedes(x-2^31, x). When the system reaches xidStopLimit, - * there are two possible counts of page boundaries between oldestXact and the - * latest XID assigned, depending on whether oldestXact is within the first - * 128 entries of its page. Since this function doesn't know the location of - * oldestXact within page2, it returns false for one page that actually is - * expendable. This is a wider (yet still negligible) version of the - * truncation opportunity that CLOGPagePrecedes() cannot recognize. - * - * For the sake of a worked example, number entries with decimal values such - * that page1==1 entries range from 1.0 to 1.999. Let N+0.15 be the number of - * pages that 2^31 entries will span (N is an integer). If oldestXact=N+2.1, - * then the final safe XID assignment leaves newestXact=1.95. We keep page 2, - * because entry=2.85 is the border that toggles whether entries precede the - * last entry of the oldestXact page. While page 2 is expendable at - * oldestXact=N+2.1, it would be precious at oldestXact=N+2.9. */ static bool CommitTsPagePrecedes(int64 page1, int64 page2) diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 1bf12d6279e..07155818788 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -436,6 +436,9 @@ MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1, /* MultiXactIdSetOldestMember() must have been called already. */ Assert(MultiXactIdIsValid(OldestMemberMXactId[MyProcNumber])); + /* memset members array because with 64-bit xids it has a padding hole */ + MemSet(members, 0, sizeof(members)); + /* * Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs * are still running. In typical usage, xid2 will be our own XID and the @@ -551,7 +554,7 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status) * end of the loop. */ newMembers = (MultiXactMember *) - palloc(sizeof(MultiXactMember) * (nmembers + 1)); + palloc0(sizeof(MultiXactMember) * (nmembers + 1)); for (i = 0, j = 0; i < nmembers; i++) { @@ -955,8 +958,8 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, for (i = 0; i < nmembers; i++, offset++) { TransactionId *memberptr; - uint32 *flagsptr; - uint32 flagsval; + uint64 *flagsptr; + uint64 flagsval; int bshift; int flagsoff; int memberoff; @@ -993,12 +996,12 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset, *memberptr = members[i].xid; - flagsptr = (uint32 *) + flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); flagsval = *flagsptr; - flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift); - flagsval |= (members[i].status << bshift); + flagsval &= ~((uint64) ((1ULL << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift); + flagsval |= ((uint64) members[i].status << bshift); *flagsptr = flagsval; MultiXactMemberCtl->shared->page_dirty[slotno] = true; @@ -1117,8 +1120,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) /* complain even if that DB has disappeared */ if (oldest_datname) ereport(WARNING, - (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used", - "database \"%s\" must be vacuumed before %u more MultiXactIds are used", + (errmsg_plural("database \"%s\" must be vacuumed before %lu more MultiXactId is used", + "database \"%s\" must be vacuumed before %lu more MultiXactIds are used", multiWrapLimit - result, oldest_datname, multiWrapLimit - result), @@ -1126,8 +1129,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset) "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); else ereport(WARNING, - (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used", - "database with OID %u must be vacuumed before %u more MultiXactIds are used", + (errmsg_plural("database with OID %u must be vacuumed before %lu more MultiXactId is used", + "database with OID %u must be vacuumed before %lu more MultiXactIds are used", multiWrapLimit - result, oldest_datoid, multiWrapLimit - result), @@ -1357,7 +1360,10 @@ retry: offptr += entryno; offset = *offptr; - Assert(offset != 0); + if (offset == 0) + ereport(ERROR, + (errmsg("found invalid zero offset in multixact %llu", + (unsigned long long) multi))); /* * Use the same increment rule as GetNewMultiXactId(), that is, don't @@ -1432,14 +1438,14 @@ retry: if (slept) ConditionVariableCancelSleep(); - ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember)); + ptr = (MultiXactMember *) palloc0(length * sizeof(MultiXactMember)); truelength = 0; prev_pageno = -1; for (int i = 0; i < length; i++, offset++) { TransactionId *xactptr; - uint32 *flagsptr; + uint64 *flagsptr; int flagsoff; int bshift; int memberoff; @@ -1481,7 +1487,7 @@ retry: flagsoff = MXOffsetToFlagsOffset(offset); bshift = MXOffsetToFlagsBitShift(offset); - flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); + flagsptr = (uint64 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff); ptr[truelength].xid = *xactptr; ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK; @@ -2363,7 +2369,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, /* Log the info */ ereport(DEBUG1, - (errmsg_internal("MultiXactId wrap limit is %u, limited by database with OID %u", + (errmsg_internal("MultiXactId wrap limit is %lu, limited by database with OID %u", multiWrapLimit, oldest_datoid))); /* @@ -2413,8 +2419,8 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, if (oldest_datname) ereport(WARNING, - (errmsg_plural("database \"%s\" must be vacuumed before %u more MultiXactId is used", - "database \"%s\" must be vacuumed before %u more MultiXactIds are used", + (errmsg_plural("database \"%s\" must be vacuumed before %lu more MultiXactId is used", + "database \"%s\" must be vacuumed before %lu more MultiXactIds are used", multiWrapLimit - curMulti, oldest_datname, multiWrapLimit - curMulti), @@ -2422,8 +2428,8 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); else ereport(WARNING, - (errmsg_plural("database with OID %u must be vacuumed before %u more MultiXactId is used", - "database with OID %u must be vacuumed before %u more MultiXactIds are used", + (errmsg_plural("database with OID %u must be vacuumed before %lu more MultiXactId is used", + "database with OID %u must be vacuumed before %lu more MultiXactIds are used", multiWrapLimit - curMulti, oldest_datoid, multiWrapLimit - curMulti), @@ -2529,7 +2535,7 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers) { int flagsoff; int flagsbit; - uint32 difference; + uint64 difference; /* * Only zero when at first entry of a page. @@ -3058,7 +3064,7 @@ MultiXactOffsetPagePrecedes(int64 page1, int64 page2) /* * Decide whether a MultiXactMember page number is "older" for truncation - * purposes. There is no "invalid offset number" so use the numbers verbatim. + * purposes. There is no "invalid offset number" so use the numbers verbatim. */ static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2) @@ -3083,7 +3089,7 @@ MultiXactMemberPagePrecedes(int64 page1, int64 page2) bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2) { - int32 diff = (int32) (multi1 - multi2); + int64 diff = (int64) (multi1 - multi2); return (diff < 0); } @@ -3097,7 +3103,7 @@ MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2) bool MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2) { - int32 diff = (int32) (multi1 - multi2); + int64 diff = (int64) (multi1 - multi2); return (diff <= 0); } diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 170ff9a7e8e..6d07059eed4 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -1637,7 +1637,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) * must not assign. */ lhs = per_page + offset; /* skip first page to avoid non-normal XIDs */ - rhs = lhs + (1U << 31); + rhs = lhs + (1ULL << 63); Assert(TransactionIdPrecedes(lhs, rhs)); Assert(TransactionIdPrecedes(rhs, lhs)); Assert(!TransactionIdPrecedes(lhs - 1, rhs)); @@ -1653,13 +1653,14 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 3 * per_page) / per_page)); Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 2 * per_page) / per_page)); Assert(ctl->PagePrecedes(rhs / per_page, (lhs - 1 * per_page) / per_page) - || (1U << 31) % per_page != 0); /* See CommitTsPagePrecedes() */ + || (1ULL << 63) % per_page != 0); /* See CommitTsPagePrecedes() */ Assert(ctl->PagePrecedes((lhs + 1 * per_page) / per_page, rhs / per_page) - || (1U << 31) % per_page != 0); + || (1ULL << 63) % per_page != 0); Assert(ctl->PagePrecedes((lhs + 2 * per_page) / per_page, rhs / per_page)); Assert(ctl->PagePrecedes((lhs + 3 * per_page) / per_page, rhs / per_page)); Assert(!ctl->PagePrecedes(rhs / per_page, (lhs + per_page) / per_page)); + /* * GetNewTransactionId() has assigned the last XID it can safely use, and * that XID is in the *LAST* page of the second segment. We must not @@ -1669,7 +1670,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) newestXact = newestPage * per_page + offset; Assert(newestXact / per_page == newestPage); oldestXact = newestXact + 1; - oldestXact -= 1U << 31; + oldestXact -= 1ULL << 63; oldestPage = oldestXact / per_page; Assert(!SlruMayDeleteSegment(ctl, (newestPage - @@ -1685,7 +1686,7 @@ SlruPagePrecedesTestOffset(SlruCtl ctl, int per_page, uint32 offset) newestXact = newestPage * per_page + offset; Assert(newestXact / per_page == newestPage); oldestXact = newestXact + 1; - oldestXact -= 1U << 31; + oldestXact -= 1ULL << 63; oldestPage = oldestXact / per_page; Assert(!SlruMayDeleteSegment(ctl, (newestPage - diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 6564412e6b4..3211f28f7bb 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -271,12 +271,17 @@ void BootStrapSUBTRANS(void) { int slotno; - LWLock *lock = SimpleLruGetBankLock(SubTransCtl, 0); + int64 pageno; + LWLock *lock; + + pageno = TransactionIdToPage(XidFromFullTransactionId(TransamVariables->nextXid)); + + lock = SimpleLruGetBankLock(SubTransCtl, 0); LWLockAcquire(lock, LW_EXCLUSIVE); /* Create and zero the first page of the subtrans log */ - slotno = ZeroSUBTRANSPage(0); + slotno = ZeroSUBTRANSPage(pageno); /* Make sure it's written out */ SimpleLruWritePage(SubTransCtl, slotno); @@ -341,9 +346,6 @@ StartupSUBTRANS(TransactionId oldestActiveXID) break; startPage++; - /* must account for wraparound */ - if (startPage > TransactionIdToPage(MaxTransactionId)) - startPage = 0; } LWLockRelease(lock); @@ -421,6 +423,7 @@ TruncateSUBTRANS(TransactionId oldestXact) * a page and oldestXact == next XID. In that case, if we didn't subtract * one, we'd trigger SimpleLruTruncate's wraparound detection. */ + TransactionIdRetreat(oldestXact); cutoffPage = TransactionIdToPage(oldestXact); diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c index 29f89e83c40..344b0628860 100644 --- a/src/backend/access/transam/transam.c +++ b/src/backend/access/transam/transam.c @@ -281,14 +281,14 @@ TransactionIdPrecedes(TransactionId id1, TransactionId id2) { /* * If either ID is a permanent XID then we can just do unsigned - * comparison. If both are normal, do a modulo-2^32 comparison. + * comparison. If both are normal, do a modulo-2^64 comparison. */ - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 < id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff < 0); } @@ -298,12 +298,12 @@ TransactionIdPrecedes(TransactionId id1, TransactionId id2) bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2) { - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 <= id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff <= 0); } @@ -313,12 +313,12 @@ TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2) bool TransactionIdFollows(TransactionId id1, TransactionId id2) { - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 > id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff > 0); } @@ -328,12 +328,12 @@ TransactionIdFollows(TransactionId id1, TransactionId id2) bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2) { - int32 diff; + int64 diff; if (!TransactionIdIsNormal(id1) || !TransactionIdIsNormal(id2)) return (id1 >= id2); - diff = (int32) (id1 - id2); + diff = (int64) (id1 - id2); return (diff >= 0); } diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 3c4e3c3cc25..009638077fd 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -458,8 +458,8 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, proc->vxid.lxid = xid; proc->vxid.procNumber = INVALID_PROC_NUMBER; } - proc->xid = xid; - Assert(proc->xmin == InvalidTransactionId); + pg_atomic_write_u64(&proc->xid, xid); + Assert(pg_atomic_read_u64(&proc->xmin) == InvalidTransactionId); proc->delayChkptFlags = 0; proc->statusFlags = 0; proc->pid = 0; @@ -774,7 +774,7 @@ pg_prepared_xact(PG_FUNCTION_ARGS) * Form tuple with appropriate data. */ - values[0] = TransactionIdGetDatum(proc->xid); + values[0] = TransactionIdGetDatum(pg_atomic_read_u64(&proc->xid)); values[1] = CStringGetTextDatum(gxact->gid); values[2] = TimestampTzGetDatum(gxact->prepared_at); values[3] = ObjectIdGetDatum(gxact->owner); @@ -927,30 +927,8 @@ TwoPhaseGetDummyProc(TransactionId xid, bool lock_held) /* State file support */ /************************************************************************/ -/* - * Compute the FullTransactionId for the given TransactionId. - * - * This is safe if the xid has not yet reached COMMIT PREPARED or ROLLBACK - * PREPARED. After those commands, concurrent vac_truncate_clog() may make - * the xid cease to qualify as allowable. XXX Not all callers limit their - * calls accordingly. - */ -static inline FullTransactionId -AdjustToFullTransactionId(TransactionId xid) -{ - Assert(TransactionIdIsValid(xid)); - return FullTransactionIdFromAllowableAt(ReadNextFullTransactionId(), xid); -} - -static inline int -TwoPhaseFilePath(char *path, TransactionId xid) -{ - FullTransactionId fxid = AdjustToFullTransactionId(xid); - - return snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X", - EpochFromFullTransactionId(fxid), - XidFromFullTransactionId(fxid)); -} +#define TwoPhaseFilePath(path, xid) \ + snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%016llX", (unsigned long long) xid) /* * 2PC state file format: @@ -1899,11 +1877,9 @@ restoreTwoPhaseData(void) strspn(clde->d_name, "0123456789ABCDEF") == 16) { TransactionId xid; - FullTransactionId fxid; char *buf; - fxid = FullTransactionIdFromU64(strtou64(clde->d_name, NULL, 16)); - xid = XidFromFullTransactionId(fxid); + xid = (TransactionId) strtou64(clde->d_name, NULL, 16); buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, true, false, false); @@ -2234,7 +2210,6 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (fromdisk) { - /* Read and validate file */ buf = ReadTwoPhaseFile(xid, false); } else @@ -2709,7 +2684,7 @@ IsTwoPhaseTransactionGidForSubid(Oid subid, char *gid) char gid_tmp[GIDSIZE]; /* Extract the subid and xid from the given GID */ - ret = sscanf(gid, "pg_gid_%u_%u", &subid_from_gid, &xid_from_gid); + ret = sscanf(gid, "pg_gid_%u_%lu", &subid_from_gid, &xid_from_gid); /* * Check that the given GID has expected format, and at least the subid diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index ab66478e5cc..2aa40186890 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -93,9 +93,9 @@ GetNewTransactionId(bool isSubXact) if (IsBootstrapProcessingMode()) { Assert(!isSubXact); - MyProc->xid = BootstrapTransactionId; - ProcGlobal->xids[MyProc->pgxactoff] = BootstrapTransactionId; - return FullTransactionIdFromEpochAndXid(0, BootstrapTransactionId); + pg_atomic_write_u64(&MyProc->xid, BootstrapTransactionId); + pg_atomic_write_u64(&ProcGlobal->xids[MyProc->pgxactoff], BootstrapTransactionId); + return FullTransactionIdFromXid(BootstrapTransactionId); } /* safety check, we should never get this far in a HS standby */ @@ -107,19 +107,6 @@ GetNewTransactionId(bool isSubXact) full_xid = TransamVariables->nextXid; xid = XidFromFullTransactionId(full_xid); - /*---------- - * Check to see if it's safe to assign another XID. This protects against - * catastrophic data loss due to XID wraparound. The basic rules are: - * - * If we're past xidVacLimit, start trying to force autovacuum cycles. - * If we're past xidWarnLimit, start issuing warnings. - * If we're past xidStopLimit, refuse to execute transactions, unless - * we are running in single-user mode (which gives an escape hatch - * to the DBA who somehow got past the earlier defenses). - * - * Note that this coding also appears in GetNewMultiXactId. - *---------- - */ if (TransactionIdFollowsOrEquals(xid, TransamVariables->xidVacLimit)) { /* @@ -129,11 +116,6 @@ GetNewTransactionId(bool isSubXact) * possibility of deadlock while doing get_database_name(). First, * copy all the shared values we'll need in this path. */ - TransactionId xidWarnLimit = TransamVariables->xidWarnLimit; - TransactionId xidStopLimit = TransamVariables->xidStopLimit; - TransactionId xidWrapLimit = TransamVariables->xidWrapLimit; - Oid oldest_datoid = TransamVariables->oldestXidDB; - LWLockRelease(XidGenLock); /* @@ -144,48 +126,6 @@ GetNewTransactionId(bool isSubXact) if (IsUnderPostmaster && (xid % 65536) == 0) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - if (IsUnderPostmaster && - TransactionIdFollowsOrEquals(xid, xidStopLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that assign new transaction IDs to avoid wraparound data loss in database \"%s\"", - oldest_datname), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("database is not accepting commands that assign new transaction IDs to avoid wraparound data loss in database with OID %u", - oldest_datoid), - errhint("Execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - else if (TransactionIdFollowsOrEquals(xid, xidWarnLimit)) - { - char *oldest_datname = get_database_name(oldest_datoid); - - /* complain even if that DB has disappeared */ - if (oldest_datname) - ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %llu transactions", - oldest_datname, - (unsigned long long) xidWrapLimit - xid), - errhint("To avoid transaction ID assignment failures, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg("database with OID %u must be vacuumed within %llu transactions", - oldest_datoid, - (unsigned long long) xidWrapLimit - xid), - errhint("To avoid XID assignment failures, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } - /* Re-acquire lock and start over */ LWLockAcquire(XidGenLock, LW_EXCLUSIVE); full_xid = TransamVariables->nextXid; @@ -255,8 +195,8 @@ GetNewTransactionId(bool isSubXact) Assert(!MyProc->subxidStatus.overflowed); /* LWLockRelease acts as barrier */ - MyProc->xid = xid; - ProcGlobal->xids[MyProc->pgxactoff] = xid; + pg_atomic_write_u64(&MyProc->xid, xid); + pg_atomic_write_u64(&ProcGlobal->xids[MyProc->pgxactoff], xid); } else { @@ -297,7 +237,7 @@ ReadNextFullTransactionId(void) } /* - * Advance nextXid to the value after a given xid. The epoch is inferred. + * Advance nextXid to the value after a given xid. * This must only be called during recovery or from two-phase start-up code. */ void @@ -305,7 +245,6 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid) { FullTransactionId newNextFullXid; TransactionId next_xid; - uint32 epoch; /* * It is safe to read nextXid without a lock, because this is only called @@ -319,19 +258,9 @@ AdvanceNextFullTransactionIdPastXid(TransactionId xid) if (!TransactionIdFollowsOrEquals(xid, next_xid)) return; - /* - * Compute the FullTransactionId that comes after the given xid. To do - * this, we preserve the existing epoch, but detect when we've wrapped - * into a new epoch. This is necessary because WAL records and 2PC state - * currently contain 32 bit xids. The wrap logic is safe in those cases - * because the span of active xids cannot exceed one epoch at any given - * point in the WAL stream. - */ + /* Compute the FullTransactionId that comes after the given xid. */ TransactionIdAdvance(xid); - epoch = EpochFromFullTransactionId(TransamVariables->nextXid); - if (unlikely(xid < next_xid)) - ++epoch; - newNextFullXid = FullTransactionIdFromEpochAndXid(epoch, xid); + newNextFullXid = FullTransactionIdFromXid(xid); /* * We still need to take a lock to modify the value when there are @@ -372,61 +301,14 @@ void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) { TransactionId xidVacLimit; - TransactionId xidWarnLimit; - TransactionId xidStopLimit; - TransactionId xidWrapLimit; TransactionId curXid; Assert(TransactionIdIsNormal(oldest_datfrozenxid)); - /* - * The place where we actually get into deep trouble is halfway around - * from the oldest potentially-existing XID. (This calculation is - * probably off by one or two counts, because the special XIDs reduce the - * size of the loop a little bit. But we throw in plenty of slop below, - * so it doesn't matter.) - */ - xidWrapLimit = oldest_datfrozenxid + (MaxTransactionId >> 1); - if (xidWrapLimit < FirstNormalTransactionId) - xidWrapLimit += FirstNormalTransactionId; - - /* - * We'll refuse to continue assigning XIDs in interactive mode once we get - * within 3M transactions of data loss. This leaves lots of room for the - * DBA to fool around fixing things in a standalone backend, while not - * being significant compared to total XID space. (VACUUM requires an XID - * if it truncates at wal_level!=minimal. "VACUUM (ANALYZE)", which a DBA - * might do by reflex, assigns an XID. Hence, we had better be sure - * there's lots of XIDs left...) Also, at default BLCKSZ, this leaves two - * completely-idle segments. In the event of edge-case bugs involving - * page or segment arithmetic, idle segments render the bugs unreachable - * outside of single-user mode. - */ - xidStopLimit = xidWrapLimit - 3000000; - if (xidStopLimit < FirstNormalTransactionId) - xidStopLimit -= FirstNormalTransactionId; - - /* - * We'll start complaining loudly when we get within 40M transactions of - * data loss. This is kind of arbitrary, but if you let your gas gauge - * get down to 2% of full, would you be looking for the next gas station? - * We need to be fairly liberal about this number because there are lots - * of scenarios where most transactions are done by automatic clients that - * won't pay attention to warnings. (No, we're not gonna make this - * configurable. If you know enough to configure it, you know enough to - * not get in this kind of trouble in the first place.) - */ - xidWarnLimit = xidWrapLimit - 40000000; - if (xidWarnLimit < FirstNormalTransactionId) - xidWarnLimit -= FirstNormalTransactionId; - /* * We'll start trying to force autovacuums when oldest_datfrozenxid gets * to be more than autovacuum_freeze_max_age transactions old. * - * Note: guc.c ensures that autovacuum_freeze_max_age is in a sane range, - * so that xidVacLimit will be well before xidWarnLimit. - * * Note: autovacuum_freeze_max_age is a PGC_POSTMASTER parameter so that * we don't have to worry about dealing with on-the-fly changes in its * value. It doesn't look practical to update shared state from a GUC @@ -443,18 +325,10 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) LWLockAcquire(XidGenLock, LW_EXCLUSIVE); TransamVariables->oldestXid = oldest_datfrozenxid; TransamVariables->xidVacLimit = xidVacLimit; - TransamVariables->xidWarnLimit = xidWarnLimit; - TransamVariables->xidStopLimit = xidStopLimit; - TransamVariables->xidWrapLimit = xidWrapLimit; TransamVariables->oldestXidDB = oldest_datoid; curXid = XidFromFullTransactionId(TransamVariables->nextXid); LWLockRelease(XidGenLock); - /* Log the info */ - ereport(DEBUG1, - (errmsg_internal("transaction ID wrap limit is %llu, limited by database with OID %u", - (unsigned long long) xidWrapLimit, oldest_datoid))); - /* * If past the autovacuum force point, immediately signal an autovac * request. The reason for this is that autovac only processes one @@ -465,41 +339,6 @@ SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid) if (TransactionIdFollowsOrEquals(curXid, xidVacLimit) && IsUnderPostmaster && !InRecovery) SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER); - - /* Give an immediate warning if past the wrap warn point */ - if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit) && !InRecovery) - { - char *oldest_datname; - - /* - * We can be called when not inside a transaction, for example during - * StartupXLOG(). In such a case we cannot do database access, so we - * must just report the oldest DB's OID. - * - * Note: it's also possible that get_database_name fails and returns - * NULL, for example because the database just got dropped. We'll - * still warn, even though the warning might now be unnecessary. - */ - if (IsTransactionState()) - oldest_datname = get_database_name(oldest_datoid); - else - oldest_datname = NULL; - - if (oldest_datname) - ereport(WARNING, - (errmsg("database \"%s\" must be vacuumed within %llu transactions", - oldest_datname, - (unsigned long long) xidWrapLimit - curXid), - errhint("To avoid XID assignment failures, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - else - ereport(WARNING, - (errmsg("database with OID %u must be vacuumed within %llu transactions", - oldest_datoid, - (unsigned long long) xidWrapLimit - curXid), - errhint("To avoid XID assignment failures, execute a database-wide VACUUM in that database.\n" - "You might also need to commit or roll back old prepared transactions, or drop stale replication slots."))); - } } diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 86c5ea89b0e..83000402ea8 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -5878,6 +5878,17 @@ XactLogCommitRecord(TimestampTz commit_time, xl_subxacts.nsubxacts = nsubxacts; } + if (TransactionIdIsValid(twophase_xid)) + { + xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; + xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF); + xl_twophase.xid_hi = (uint32) (twophase_xid >> 32); + Assert(twophase_gid != NULL); + + if (XLogLogicalInfoActive()) + xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; + } + if (nrels > 0) { xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILELOCATORS; @@ -5897,16 +5908,6 @@ XactLogCommitRecord(TimestampTz commit_time, xl_invals.nmsgs = nmsgs; } - if (TransactionIdIsValid(twophase_xid)) - { - xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; - xl_twophase.xid = twophase_xid; - Assert(twophase_gid != NULL); - - if (XLogLogicalInfoActive()) - xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; - } - /* dump transaction origin information */ if (replorigin_session_origin != InvalidRepOriginId) { @@ -6027,6 +6028,17 @@ XactLogAbortRecord(TimestampTz abort_time, xl_subxacts.nsubxacts = nsubxacts; } + if (TransactionIdIsValid(twophase_xid)) + { + xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; + xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF); + xl_twophase.xid_hi = (uint32) (twophase_xid >> 32); + Assert(twophase_gid != NULL); + + if (XLogLogicalInfoActive()) + xl_xinfo.xinfo |= XACT_XINFO_HAS_GID; + } + if (nrels > 0) { xl_xinfo.xinfo |= XACT_XINFO_HAS_RELFILELOCATORS; @@ -6043,7 +6055,8 @@ XactLogAbortRecord(TimestampTz abort_time, if (TransactionIdIsValid(twophase_xid)) { xl_xinfo.xinfo |= XACT_XINFO_HAS_TWOPHASE; - xl_twophase.xid = twophase_xid; + xl_twophase.xid_lo = (uint32) (twophase_xid & 0xFFFFFFFF); + xl_twophase.xid_hi = (uint32) (twophase_xid >> 32); Assert(twophase_gid != NULL); if (XLogLogicalInfoActive()) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 4b6c694a3f7..d9b53ec68b5 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5125,8 +5125,7 @@ BootStrapXLOG(uint32 data_checksum_version) checkPoint.PrevTimeLineID = BootstrapTimeLineID; checkPoint.fullPageWrites = fullPageWrites; checkPoint.wal_level = wal_level; - checkPoint.nextXid = - FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); + checkPoint.nextXid = FullTransactionIdFromXid(FirstNormalTransactionId); checkPoint.nextOid = FirstGenbkiObjectId; checkPoint.nextMulti = FirstMultiXactId; checkPoint.nextMultiOffset = 0; @@ -7314,7 +7313,7 @@ CreateCheckPoint(int flags) UpdateControlFile(); LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); @@ -8388,7 +8387,7 @@ xlog_redo(XLogReaderState *record) ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); @@ -8449,7 +8448,7 @@ xlog_redo(XLogReaderState *record) ControlFile->checkPointCopy.nextXid = checkPoint.nextXid; LWLockRelease(ControlFileLock); - /* Update shared-memory copy of checkpoint XID/epoch */ + /* Update shared-memory copy of checkpoint XID/base */ SpinLockAcquire(&XLogCtl->info_lck); XLogCtl->ckptFullXid = checkPoint.nextXid; SpinLockRelease(&XLogCtl->info_lck); diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index 14d583ae7ae..4ebb7a2ebe7 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -273,6 +273,11 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags) BufferGetTag(buffer, ®buf->rlocator, ®buf->forkno, ®buf->block); regbuf->page = BufferGetPage(buffer); regbuf->flags = flags; + if (IsBufferConverted(buffer)) + { + regbuf->flags |= REGBUF_CONVERTED; + MarkBufferConverted(buffer, false); + } regbuf->rdata_tail = (XLogRecData *) ®buf->rdata_head; regbuf->rdata_len = 0; @@ -606,6 +611,8 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, needs_backup = true; else if (regbuf->flags & REGBUF_NO_IMAGE) needs_backup = false; + else if (regbuf->flags & REGBUF_CONVERTED) + needs_backup = true; else if (!doPageWrites) needs_backup = false; else diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 5483d4f0dd2..08a2b832352 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -2167,23 +2167,3 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) return true; } - -#ifndef FRONTEND - -/* - * Extract the FullTransactionId from a WAL record. - */ -FullTransactionId -XLogRecGetFullXid(XLogReaderState *record) -{ - /* - * This function is only safe during replay, because it depends on the - * replay state. See AdvanceNextFullTransactionIdPastXid() for more. - */ - Assert(AmStartupProcess() || !IsUnderPostmaster); - - return FullTransactionIdFromAllowableAt(TransamVariables->nextXid, - XLogRecGetXid(record)); -} - -#endif diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 6db864892d0..7fe5930c528 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -115,7 +115,7 @@ static const struct typinfo TypInfo[] = { F_OIDIN, F_OIDOUT}, {"tid", TIDOID, 0, 6, false, TYPALIGN_SHORT, TYPSTORAGE_PLAIN, InvalidOid, F_TIDIN, F_TIDOUT}, - {"xid", XIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, + {"xid", XIDOID, 0, 8, FLOAT8PASSBYVAL, TYPALIGN_XID, TYPSTORAGE_PLAIN, InvalidOid, F_XIDIN, F_XIDOUT}, {"cid", CIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, F_CIDIN, F_CIDOUT}, diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index bd3554c0bfd..29cdec7d756 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -160,8 +160,8 @@ static const FormData_pg_attribute a2 = { .attlen = sizeof(TransactionId), .attnum = MinTransactionIdAttributeNumber, .atttypmod = -1, - .attbyval = true, - .attalign = TYPALIGN_INT, + .attbyval = FLOAT8PASSBYVAL, + .attalign = TYPALIGN_XID, .attstorage = TYPSTORAGE_PLAIN, .attnotnull = true, .attislocal = true, @@ -186,8 +186,8 @@ static const FormData_pg_attribute a4 = { .attlen = sizeof(TransactionId), .attnum = MaxTransactionIdAttributeNumber, .atttypmod = -1, - .attbyval = true, - .attalign = TYPALIGN_INT, + .attbyval = FLOAT8PASSBYVAL, + .attalign = TYPALIGN_XID, .attstorage = TYPSTORAGE_PLAIN, .attnotnull = true, .attislocal = true, diff --git a/src/backend/catalog/pg_inherits.c b/src/backend/catalog/pg_inherits.c index 929bb53b620..3b613971e3b 100644 --- a/src/backend/catalog/pg_inherits.c +++ b/src/backend/catalog/pg_inherits.c @@ -145,7 +145,7 @@ find_inheritance_children_extended(Oid parentrelId, bool omit_detached, TransactionId xmin; Snapshot snap; - xmin = HeapTupleHeaderGetXmin(inheritsTuple->t_data); + xmin = HeapTupleGetXmin(inheritsTuple); snap = GetActiveSnapshot(); if (!XidInMVCCSnapshot(xmin, snap)) diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 4bd37d5beb5..be4d8bb4f3e 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -184,7 +184,7 @@ typedef struct AsyncQueueEntry } AsyncQueueEntry; /* Currently, no field of AsyncQueueEntry requires more than int alignment */ -#define QUEUEALIGN(len) INTALIGN(len) +#define QUEUEALIGN(len) TYPEALIGN(8, len) #define AsyncQueueEntryEmptySize (offsetof(AsyncQueueEntry, data) + 2) diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 5fbbcdaabb1..4b001258655 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -129,7 +129,8 @@ static void CreateDatabaseUsingWalLog(Oid src_dboid, Oid dst_dboid, Oid src_tsid static List *ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath); static List *ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid, char *srcpath, - List *rlocatorlist, Snapshot snapshot); + List *rlocatorlist, Snapshot snapshot, + bool is_toast); static CreateDBRelInfo *ScanSourceDatabasePgClassTuple(HeapTupleData *tuple, Oid tbid, Oid dbid, char *srcpath); @@ -307,9 +308,10 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath) } /* Append relevant pg_class tuples for current page to rlocatorlist. */ + /* No toast is expected in sys tables */ rlocatorlist = ScanSourceDatabasePgClassPage(page, buf, tbid, dbid, srcpath, rlocatorlist, - snapshot); + snapshot, false); UnlockReleaseBuffer(buf); } @@ -328,7 +330,7 @@ ScanSourceDatabasePgClass(Oid tbid, Oid dbid, char *srcpath) static List * ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid, char *srcpath, List *rlocatorlist, - Snapshot snapshot) + Snapshot snapshot, bool is_toast) { BlockNumber blkno = BufferGetBlockNumber(buf); OffsetNumber offnum; @@ -358,6 +360,7 @@ ScanSourceDatabasePgClassPage(Page page, Buffer buf, Oid tbid, Oid dbid, tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); tuple.t_tableOid = RelationRelationId; + HeapTupleCopyXidsFromPage(buf, &tuple, page, is_toast); /* Skip tuples that are not visible to this snapshot. */ if (HeapTupleSatisfiesVisibility(&tuple, snapshot, buf)) diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 89cc83e8843..06869c694ef 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1780,7 +1780,7 @@ DefineIndex(Oid tableId, set_indexsafe_procflags(); /* We should now definitely not be advertising any xmin. */ - Assert(MyProc->xmin == InvalidTransactionId); + Assert(pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId); /* * The index is now valid in the sense that it contains all currently @@ -4580,8 +4580,8 @@ set_indexsafe_procflags(void) * This should only be called before installing xid or xmin in MyProc; * otherwise, concurrent processes could see an Xmin that moves backwards. */ - Assert(MyProc->xid == InvalidTransactionId && - MyProc->xmin == InvalidTransactionId); + Assert(pg_atomic_read_u64(&MyProc->xid) == InvalidTransactionId && + pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId); LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); MyProc->statusFlags |= PROC_IN_SAFE_IC; diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 4b7c5113aab..de722248999 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -49,6 +49,25 @@ #include "utils/syscache.h" #include "utils/varlena.h" +static inline void +SeqTupleSetXmin(HeapTuple htup, TransactionId xid) +{ + htup->t_xmin = xid; + htup->t_data->t_choice.t_heap.t_xmin = xid; +} + +static inline void +SeqTupleSetXmax(HeapTuple htup, TransactionId xid) +{ + htup->t_xmin = xid; + htup->t_data->t_choice.t_heap.t_xmax = xid; +} + +static inline TransactionId +SeqTupleHeaderGetRawXmax(HeapTupleHeader htup) +{ + return htup->t_choice.t_heap.t_xmax; +} /* * We don't want to log each fetching of a value from a sequence, @@ -384,10 +403,10 @@ fill_seq_fork_with_data(Relation rel, HeapTuple tuple, ForkNumber forkNum) * because if the current transaction aborts, no other xact will ever * examine the sequence tuple anyway. */ - HeapTupleHeaderSetXmin(tuple->t_data, FrozenTransactionId); - HeapTupleHeaderSetXminFrozen(tuple->t_data); + SeqTupleSetXmin(tuple, FrozenTransactionId); + HeapTupleHeaderStoreXminFrozen(tuple->t_data); HeapTupleHeaderSetCmin(tuple->t_data, FirstCommandId); - HeapTupleHeaderSetXmax(tuple->t_data, InvalidTransactionId); + SeqTupleSetXmax(tuple, InvalidTransactionId); tuple->t_data->t_infomask |= HEAP_XMAX_INVALID; ItemPointerSet(&tuple->t_data->t_ctid, 0, FirstOffsetNumber); @@ -1210,6 +1229,7 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) /* Note we currently only bother to set these two fields of *seqdatatuple */ seqdatatuple->t_data = (HeapTupleHeader) PageGetItem(page, lp); seqdatatuple->t_len = ItemIdGetLength(lp); + HeapTupleCopyHeaderXids(seqdatatuple); /* * Previous releases of Postgres neglected to prevent SELECT FOR UPDATE on @@ -1220,9 +1240,9 @@ read_seq_tuple(Relation rel, Buffer *buf, HeapTuple seqdatatuple) * this again if the update gets lost. */ Assert(!(seqdatatuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); - if (HeapTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) + if (SeqTupleHeaderGetRawXmax(seqdatatuple->t_data) != InvalidTransactionId) { - HeapTupleHeaderSetXmax(seqdatatuple->t_data, InvalidTransactionId); + SeqTupleSetXmax(seqdatatuple, InvalidTransactionId); seqdatatuple->t_data->t_infomask &= ~HEAP_XMAX_COMMITTED; seqdatatuple->t_data->t_infomask |= HEAP_XMAX_INVALID; MarkBufferDirtyHint(*buf, true); diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 452241d637f..db459a13d12 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -70,12 +70,12 @@ /* * GUC parameters */ -int vacuum_freeze_min_age; -int vacuum_freeze_table_age; -int vacuum_multixact_freeze_min_age; -int vacuum_multixact_freeze_table_age; -int vacuum_failsafe_age; -int vacuum_multixact_failsafe_age; +int64 vacuum_freeze_min_age; +int64 vacuum_freeze_table_age; +int64 vacuum_multixact_freeze_min_age; +int64 vacuum_multixact_freeze_table_age; +int64 vacuum_failsafe_age; +int64 vacuum_multixact_failsafe_age; double vacuum_max_eager_freeze_failure_rate; bool track_cost_delay_timing; bool vacuum_truncate; @@ -1102,7 +1102,7 @@ bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, struct VacuumCutoffs *cutoffs) { - int freeze_min_age, + int64 freeze_min_age, multixact_freeze_min_age, freeze_table_age, multixact_freeze_table_age, @@ -1524,6 +1524,9 @@ vac_update_relstats(Relation relation, futurexid = false; if (frozenxid_updated) *frozenxid_updated = false; + + Assert(TransactionIdPrecedesOrEquals(frozenxid, ReadNextTransactionId())); + if (TransactionIdIsNormal(frozenxid) && oldfrozenxid != frozenxid) { bool update = false; @@ -1547,6 +1550,9 @@ vac_update_relstats(Relation relation, futuremxid = false; if (minmulti_updated) *minmulti_updated = false; + + Assert(MultiXactIdPrecedesOrEquals(minmulti, ReadNextMultiXactId())); + if (MultiXactIdIsValid(minmulti) && oldminmulti != minmulti) { bool update = false; diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 8a72b5e70a4..b789240fe1b 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -3874,6 +3874,7 @@ ExecEvalFieldStoreDeForm(ExprState *state, ExprEvalStep *op, ExprContext *econte tmptup.t_len = HeapTupleHeaderGetDatumLength(tuphdr); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tmptup); tmptup.t_data = tuphdr; /* diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index 8e02d68824f..33ad30210fc 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -390,7 +390,7 @@ tts_heap_is_current_xact_tuple(TupleTableSlot *slot) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("don't have a storage tuple in this context"))); - xmin = HeapTupleHeaderGetRawXmin(hslot->tuple->t_data); + xmin = HeapTupleGetRawXmin(hslot->tuple); return TransactionIdIsCurrentTransactionId(xmin); } @@ -795,7 +795,7 @@ tts_buffer_is_current_xact_tuple(TupleTableSlot *slot) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("don't have a storage tuple in this context"))); - xmin = HeapTupleHeaderGetRawXmin(bslot->base.tuple->t_data); + xmin = HeapTupleGetRawXmin(bslot->base.tuple); return TransactionIdIsCurrentTransactionId(xmin); } diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 772c86e70e9..a6813de0bbd 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -1109,6 +1109,7 @@ GetAttributeByName(HeapTupleHeader tuple, const char *attname, bool *isNull) tmptup.t_len = HeapTupleHeaderGetDatumLength(tuple); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tmptup); tmptup.t_data = tuple; result = heap_getattr(&tmptup, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 87c820276a8..19dee7fa637 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -4299,6 +4299,7 @@ ExecModifyTable(PlanState *pstate) HeapTupleHeaderGetDatumLength(oldtupdata.t_data); ItemPointerSetInvalid(&(oldtupdata.t_self)); /* Historically, view triggers see invalid t_tableOid. */ + HeapTupleCopyHeaderXids(&oldtupdata); oldtupdata.t_tableOid = (relkind == RELKIND_VIEW) ? InvalidOid : RelationGetRelid(resultRelInfo->ri_RelationDesc); diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c index 3288396def3..3d449369efd 100644 --- a/src/backend/executor/spi.c +++ b/src/backend/executor/spi.c @@ -1157,6 +1157,7 @@ SPI_modifytuple(Relation rel, HeapTuple tuple, int natts, int *attnum, mtuple->t_data->t_ctid = tuple->t_data->t_ctid; mtuple->t_self = tuple->t_self; mtuple->t_tableOid = tuple->t_tableOid; + HeapTupleCopyXids(mtuple, tuple); } else { diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index f6229089cd1..e8a6d9dc9b8 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -1030,14 +1030,14 @@ _read${n}(void) elsif ($t eq 'uint32' || $t eq 'bits32' || $t eq 'BlockNumber' - || $t eq 'Index' - || $t eq 'SubTransactionId') + || $t eq 'Index') { print $off "\tWRITE_UINT_FIELD($f);\n"; print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read; } elsif ($t eq 'uint64' - || $t eq 'AclMode') + || $t eq 'AclMode' + || $t eq 'SubTransactionId') { print $off "\tWRITE_UINT64_FIELD($f);\n"; print $rff "\tREAD_UINT64_FIELD($f);\n" unless $no_read; diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 0489ad36644..77c10fd8f6f 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -272,7 +272,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, * src/backend/access/heap/README.HOT for discussion. */ if (index->indcheckxmin && - !TransactionIdPrecedes(HeapTupleHeaderGetXmin(indexRelation->rd_indextuple->t_data), + !TransactionIdPrecedes(HeapTupleGetXmin(indexRelation->rd_indextuple), TransactionXmin)) { root->glob->transientPlan = true; diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 4ca3acdccd8..a600afd323f 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -127,8 +127,8 @@ int64 autovacuum_vac_ins_thresh; double autovacuum_vac_ins_scale; int64 autovacuum_anl_thresh; double autovacuum_anl_scale; -int autovacuum_freeze_max_age; -int autovacuum_multixact_freeze_max_age; +int64 autovacuum_freeze_max_age; +int64 autovacuum_multixact_freeze_max_age; double autovacuum_vac_cost_delay; int autovacuum_vac_cost_limit; @@ -159,10 +159,10 @@ static TransactionId recentXid; static MultiXactId recentMulti; /* Default freeze ages to use for autovacuum (varies by database) */ -static int default_freeze_min_age; -static int default_freeze_table_age; -static int default_multixact_freeze_min_age; -static int default_multixact_freeze_table_age; +static int64 default_freeze_min_age; +static int64 default_freeze_table_age; +static int64 default_multixact_freeze_min_age; +static int64 default_multixact_freeze_table_age; /* Memory context for long-lived data */ static MemoryContext AutovacMemCxt; @@ -332,15 +332,15 @@ static void FreeWorkerInfo(int code, Datum arg); static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, - int effective_multixact_freeze_max_age); + int64 effective_multixact_freeze_max_age); static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts, Form_pg_class classForm, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound); static void autovacuum_do_vac_analyze(autovac_table *tab, @@ -1129,11 +1129,10 @@ do_start_worker(void) * particular tables, but not loosened.) */ recentXid = ReadNextTransactionId(); - xidForceLimit = recentXid - autovacuum_freeze_max_age; - /* ensure it's a "normal" XID, else TransactionIdPrecedes misbehaves */ - /* this can cause the limit to go backwards by 3, but that's OK */ - if (xidForceLimit < FirstNormalTransactionId) - xidForceLimit -= FirstNormalTransactionId; + if (recentXid > FirstNormalTransactionId + autovacuum_freeze_max_age) + xidForceLimit = recentXid - autovacuum_freeze_max_age; + else + xidForceLimit = FirstNormalTransactionId; /* Also determine the oldest datminmxid we will consider. */ recentMulti = ReadNextMultiXactId(); @@ -1896,7 +1895,7 @@ do_autovacuum(void) BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; - int effective_multixact_freeze_max_age; + int64 effective_multixact_freeze_max_age; bool did_vacuum = false; bool found_concurrent_worker = false; int i; @@ -2721,7 +2720,7 @@ extract_autovac_opts(HeapTuple tup, TupleDesc pg_class_desc) static autovac_table * table_recheck_autovac(Oid relid, HTAB *table_toast_map, TupleDesc pg_class_desc, - int effective_multixact_freeze_max_age) + int64 effective_multixact_freeze_max_age) { Form_pg_class classForm; HeapTuple classTup; @@ -2760,10 +2759,10 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, /* OK, it needs something done */ if (doanalyze || dovacuum) { - int freeze_min_age; - int freeze_table_age; - int multixact_freeze_min_age; - int multixact_freeze_table_age; + int64 freeze_min_age; + int64 freeze_table_age; + int64 multixact_freeze_min_age; + int64 multixact_freeze_table_age; int log_min_duration; /* @@ -2868,7 +2867,7 @@ static void recheck_relation_needs_vacanalyze(Oid relid, AutoVacOpts *avopts, Form_pg_class classForm, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, bool *dovacuum, bool *doanalyze, bool *wraparound) @@ -2932,7 +2931,7 @@ relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, Form_pg_class classForm, PgStat_StatTabEntry *tabentry, - int effective_multixact_freeze_max_age, + int64 effective_multixact_freeze_max_age, /* output params below */ bool *dovacuum, bool *doanalyze, @@ -2961,8 +2960,8 @@ relation_needs_vacanalyze(Oid relid, anltuples; /* freeze parameters */ - int freeze_max_age; - int multixact_freeze_max_age; + int64 freeze_max_age; + int64 multixact_freeze_max_age; TransactionId xidForceLimit; TransactionId relfrozenxid; MultiXactId multiForceLimit; @@ -3018,9 +3017,12 @@ relation_needs_vacanalyze(Oid relid, av_enabled = (relopts ? relopts->enabled : true); /* Force vacuum if table is at risk of wraparound */ - xidForceLimit = recentXid - freeze_max_age; - if (xidForceLimit < FirstNormalTransactionId) - xidForceLimit -= FirstNormalTransactionId; + + if (recentXid > FirstNormalTransactionId + freeze_max_age) + xidForceLimit = recentXid - freeze_max_age; + else + xidForceLimit = FirstNormalTransactionId; + relfrozenxid = classForm->relfrozenxid; force_vacuum = (TransactionIdIsNormal(relfrozenxid) && TransactionIdPrecedes(relfrozenxid, xidForceLimit)); @@ -3028,9 +3030,11 @@ relation_needs_vacanalyze(Oid relid, { MultiXactId relminmxid = classForm->relminmxid; - multiForceLimit = recentMulti - multixact_freeze_max_age; - if (multiForceLimit < FirstMultiXactId) - multiForceLimit -= FirstMultiXactId; + if (recentMulti > FirstMultiXactId + multixact_freeze_max_age) + multiForceLimit = recentMulti - multixact_freeze_max_age; + else + multiForceLimit = FirstMultiXactId; + force_vacuum = MultiXactIdIsValid(relminmxid) && MultiXactIdPrecedes(relminmxid, multiForceLimit); } diff --git a/src/backend/replication/logical/conflict.c b/src/backend/replication/logical/conflict.c index f1e92f2fc1a..2cc14a344bc 100644 --- a/src/backend/replication/logical/conflict.c +++ b/src/backend/replication/logical/conflict.c @@ -220,11 +220,11 @@ errdetail_apply_conflict(EState *estate, ResultRelInfo *relinfo, if (localts) { if (localorigin == InvalidRepOriginId) - appendStringInfo(&err_detail, _("Key already exists in unique index \"%s\", modified locally in transaction %u at %s."), + appendStringInfo(&err_detail, _("Key already exists in unique index \"%s\", modified locally in transaction %lu at %s."), get_rel_name(indexoid), localxmin, timestamptz_to_str(localts)); else if (replorigin_by_oid(localorigin, true, &origin_name)) - appendStringInfo(&err_detail, _("Key already exists in unique index \"%s\", modified by origin \"%s\" in transaction %u at %s."), + appendStringInfo(&err_detail, _("Key already exists in unique index \"%s\", modified by origin \"%s\" in transaction %lu at %s."), get_rel_name(indexoid), origin_name, localxmin, timestamptz_to_str(localts)); @@ -236,27 +236,27 @@ errdetail_apply_conflict(EState *estate, ResultRelInfo *relinfo, * manually dropped by the user. */ else - appendStringInfo(&err_detail, _("Key already exists in unique index \"%s\", modified by a non-existent origin in transaction %u at %s."), + appendStringInfo(&err_detail, _("Key already exists in unique index \"%s\", modified by a non-existent origin in transaction %lu at %s."), get_rel_name(indexoid), localxmin, timestamptz_to_str(localts)); } else - appendStringInfo(&err_detail, _("Key already exists in unique index \"%s\", modified in transaction %u."), + appendStringInfo(&err_detail, _("Key already exists in unique index \"%s\", modified in transaction %lu."), get_rel_name(indexoid), localxmin); break; case CT_UPDATE_ORIGIN_DIFFERS: if (localorigin == InvalidRepOriginId) - appendStringInfo(&err_detail, _("Updating the row that was modified locally in transaction %u at %s."), + appendStringInfo(&err_detail, _("Updating the row that was modified locally in transaction %lu at %s."), localxmin, timestamptz_to_str(localts)); else if (replorigin_by_oid(localorigin, true, &origin_name)) - appendStringInfo(&err_detail, _("Updating the row that was modified by a different origin \"%s\" in transaction %u at %s."), + appendStringInfo(&err_detail, _("Updating the row that was modified by a different origin \"%s\" in transaction %lu at %s."), origin_name, localxmin, timestamptz_to_str(localts)); /* The origin that modified this row has been removed. */ else - appendStringInfo(&err_detail, _("Updating the row that was modified by a non-existent origin in transaction %u at %s."), + appendStringInfo(&err_detail, _("Updating the row that was modified by a non-existent origin in transaction %lu at %s."), localxmin, timestamptz_to_str(localts)); break; @@ -267,15 +267,15 @@ errdetail_apply_conflict(EState *estate, ResultRelInfo *relinfo, case CT_DELETE_ORIGIN_DIFFERS: if (localorigin == InvalidRepOriginId) - appendStringInfo(&err_detail, _("Deleting the row that was modified locally in transaction %u at %s."), + appendStringInfo(&err_detail, _("Deleting the row that was modified locally in transaction %lu at %s."), localxmin, timestamptz_to_str(localts)); else if (replorigin_by_oid(localorigin, true, &origin_name)) - appendStringInfo(&err_detail, _("Deleting the row that was modified by a different origin \"%s\" in transaction %u at %s."), + appendStringInfo(&err_detail, _("Deleting the row that was modified by a different origin \"%s\" in transaction %lu at %s."), origin_name, localxmin, timestamptz_to_str(localts)); /* The origin that modified this row has been removed. */ else - appendStringInfo(&err_detail, _("Deleting the row that was modified by a non-existent origin in transaction %u at %s."), + appendStringInfo(&err_detail, _("Deleting the row that was modified by a non-existent origin in transaction %lu at %s."), localxmin, timestamptz_to_str(localts)); break; diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 78f9a0a11c4..63d115a6eb8 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -896,8 +896,14 @@ DecodeInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) xl_heap_insert *xlrec; ReorderBufferChange *change; RelFileLocator target_locator; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_insert *) XLogRecGetData(r); + /* Bypass pd_xid_base and pd_multi_base */ + if (isinit) + rec_data += sizeof(TransactionId) * 2; + + xlrec = (xl_heap_insert *) rec_data; /* * Ignore insert records without new tuples (this does happen when @@ -953,8 +959,13 @@ DecodeUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) ReorderBufferChange *change; char *data; RelFileLocator target_locator; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_update *) XLogRecGetData(r); + /* Bypass pd_xid_base and pd_multi_base */ + if (isinit) + rec_data += sizeof(TransactionId) * 2; + xlrec = (xl_heap_update *) rec_data; /* only interested in our database */ XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL); @@ -1114,8 +1125,13 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) char *tupledata; Size tuplelen; RelFileLocator rlocator; + bool isinit = (XLogRecGetInfo(r) & XLOG_HEAP_INIT_PAGE) != 0; + Pointer rec_data = (Pointer) XLogRecGetData(r); - xlrec = (xl_heap_multi_insert *) XLogRecGetData(r); + /* Bypass pd_xid_base and pd_multi_base */ + if (isinit) + rec_data += sizeof(TransactionId) * 2; + xlrec = (xl_heap_multi_insert *) rec_data; /* * Ignore insert records without new tuples. This happens when a @@ -1172,6 +1188,7 @@ DecodeMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) * We can only figure this out after reassembling the transactions. */ tuple->t_tableOid = InvalidOid; + HeapTupleSetZeroXids(tuple); tuple->t_len = datalen + SizeofHeapTupleHeader; @@ -1261,6 +1278,7 @@ DecodeXLogTuple(char *data, Size len, HeapTuple tuple) /* we can only figure this out after reassembling the transactions */ tuple->t_tableOid = InvalidOid; + HeapTupleSetZeroXids(tuple); /* data is not stored aligned, copy to aligned storage */ memcpy(&xlhdr, data, SizeOfHeapHeader); diff --git a/src/backend/replication/logical/proto.c b/src/backend/replication/logical/proto.c index 1a352b542dc..7190d488a2b 100644 --- a/src/backend/replication/logical/proto.c +++ b/src/backend/replication/logical/proto.c @@ -53,7 +53,7 @@ logicalrep_write_begin(StringInfo out, ReorderBufferTXN *txn) /* fixed fields */ pq_sendint64(out, txn->final_lsn); pq_sendint64(out, txn->xact_time.commit_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); } /* @@ -67,7 +67,7 @@ logicalrep_read_begin(StringInfo in, LogicalRepBeginData *begin_data) if (begin_data->final_lsn == InvalidXLogRecPtr) elog(ERROR, "final_lsn not set in begin message"); begin_data->committime = pq_getmsgint64(in); - begin_data->xid = pq_getmsgint(in, 4); + begin_data->xid = pq_getmsgint64(in); } @@ -121,7 +121,7 @@ logicalrep_write_begin_prepare(StringInfo out, ReorderBufferTXN *txn) pq_sendint64(out, txn->final_lsn); pq_sendint64(out, txn->end_lsn); pq_sendint64(out, txn->xact_time.prepare_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -141,7 +141,7 @@ logicalrep_read_begin_prepare(StringInfo in, LogicalRepPreparedTxnData *begin_da if (begin_data->end_lsn == InvalidXLogRecPtr) elog(ERROR, "end_lsn not set in begin prepare message"); begin_data->prepare_time = pq_getmsgint64(in); - begin_data->xid = pq_getmsgint(in, 4); + begin_data->xid = pq_getmsgint64(in); /* read gid (copy it into a pre-allocated buffer) */ strlcpy(begin_data->gid, pq_getmsgstring(in), sizeof(begin_data->gid)); @@ -174,7 +174,7 @@ logicalrep_write_prepare_common(StringInfo out, LogicalRepMsgType type, pq_sendint64(out, prepare_lsn); pq_sendint64(out, txn->end_lsn); pq_sendint64(out, txn->xact_time.prepare_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -213,7 +213,7 @@ logicalrep_read_prepare_common(StringInfo in, char *msgtype, if (prepare_data->end_lsn == InvalidXLogRecPtr) elog(ERROR, "end_lsn is not set in %s message", msgtype); prepare_data->prepare_time = pq_getmsgint64(in); - prepare_data->xid = pq_getmsgint(in, 4); + prepare_data->xid = pq_getmsgint64(in); if (prepare_data->xid == InvalidTransactionId) elog(ERROR, "invalid two-phase transaction ID in %s message", msgtype); @@ -254,7 +254,7 @@ logicalrep_write_commit_prepared(StringInfo out, ReorderBufferTXN *txn, pq_sendint64(out, commit_lsn); pq_sendint64(out, txn->end_lsn); pq_sendint64(out, txn->xact_time.commit_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -280,7 +280,7 @@ logicalrep_read_commit_prepared(StringInfo in, LogicalRepCommitPreparedTxnData * if (prepare_data->end_lsn == InvalidXLogRecPtr) elog(ERROR, "end_lsn is not set in commit prepared message"); prepare_data->commit_time = pq_getmsgint64(in); - prepare_data->xid = pq_getmsgint(in, 4); + prepare_data->xid = pq_getmsgint64(in); /* read gid (copy it into a pre-allocated buffer) */ strlcpy(prepare_data->gid, pq_getmsgstring(in), sizeof(prepare_data->gid)); @@ -312,7 +312,7 @@ logicalrep_write_rollback_prepared(StringInfo out, ReorderBufferTXN *txn, pq_sendint64(out, txn->end_lsn); pq_sendint64(out, prepare_time); pq_sendint64(out, txn->xact_time.commit_time); - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send gid */ pq_sendstring(out, txn->gid); @@ -340,7 +340,7 @@ logicalrep_read_rollback_prepared(StringInfo in, elog(ERROR, "rollback_end_lsn is not set in rollback prepared message"); rollback_data->prepare_time = pq_getmsgint64(in); rollback_data->rollback_time = pq_getmsgint64(in); - rollback_data->xid = pq_getmsgint(in, 4); + rollback_data->xid = pq_getmsgint64(in); /* read gid (copy it into a pre-allocated buffer) */ strlcpy(rollback_data->gid, pq_getmsgstring(in), sizeof(rollback_data->gid)); @@ -409,7 +409,7 @@ logicalrep_write_insert(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -460,7 +460,7 @@ logicalrep_write_update(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -538,7 +538,7 @@ logicalrep_write_delete(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -593,7 +593,7 @@ logicalrep_write_truncate(StringInfo out, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); pq_sendint32(out, nrelids); @@ -651,7 +651,7 @@ logicalrep_write_message(StringInfo out, TransactionId xid, XLogRecPtr lsn, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); pq_sendint8(out, flags); pq_sendint64(out, lsn); @@ -674,7 +674,7 @@ logicalrep_write_rel(StringInfo out, TransactionId xid, Relation rel, /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* use Oid as relation identifier */ pq_sendint32(out, RelationGetRelid(rel)); @@ -730,7 +730,7 @@ logicalrep_write_typ(StringInfo out, TransactionId xid, Oid typoid) /* transaction ID (if not valid, we're not streaming) */ if (TransactionIdIsValid(xid)) - pq_sendint32(out, xid); + pq_sendint64(out, xid); tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(basetypoid)); if (!HeapTupleIsValid(tup)) @@ -1066,7 +1066,7 @@ logicalrep_write_stream_start(StringInfo out, Assert(TransactionIdIsValid(xid)); /* transaction ID (we're starting to stream, so must be valid) */ - pq_sendint32(out, xid); + pq_sendint64(out, xid); /* 1 if this is the first streaming segment for this xid */ pq_sendbyte(out, first_segment ? 1 : 0); @@ -1082,7 +1082,7 @@ logicalrep_read_stream_start(StringInfo in, bool *first_segment) Assert(first_segment); - xid = pq_getmsgint(in, 4); + xid = pq_getmsgint64(in); *first_segment = (pq_getmsgbyte(in) == 1); return xid; @@ -1111,7 +1111,7 @@ logicalrep_write_stream_commit(StringInfo out, ReorderBufferTXN *txn, Assert(TransactionIdIsValid(txn->xid)); /* transaction ID */ - pq_sendint32(out, txn->xid); + pq_sendint64(out, txn->xid); /* send the flags field (unused for now) */ pq_sendbyte(out, flags); @@ -1131,7 +1131,7 @@ logicalrep_read_stream_commit(StringInfo in, LogicalRepCommitData *commit_data) TransactionId xid; uint8 flags; - xid = pq_getmsgint(in, 4); + xid = pq_getmsgint64(in); /* read flags (unused for now) */ flags = pq_getmsgbyte(in); @@ -1164,8 +1164,8 @@ logicalrep_write_stream_abort(StringInfo out, TransactionId xid, Assert(TransactionIdIsValid(xid) && TransactionIdIsValid(subxid)); /* transaction ID */ - pq_sendint32(out, xid); - pq_sendint32(out, subxid); + pq_sendint64(out, xid); + pq_sendint64(out, subxid); if (write_abort_info) { @@ -1187,8 +1187,8 @@ logicalrep_read_stream_abort(StringInfo in, { Assert(abort_data); - abort_data->xid = pq_getmsgint(in, 4); - abort_data->subxid = pq_getmsgint(in, 4); + abort_data->xid = pq_getmsgint64(in); + abort_data->subxid = pq_getmsgint64(in); if (read_abort_info) { diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index a6b5df2612f..2d145b9ac37 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -5333,8 +5333,12 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot) TransactionId f_mapped_xid; TransactionId f_create_xid; XLogRecPtr f_lsn; - uint32 f_hi, - f_lo; + uint32 f_lsn_hi, + f_lsn_lo, + f_mapped_xid_hi, + f_mapped_xid_lo, + f_create_xid_hi, + f_create_xid_lo; RewriteMappingFile *f; if (strcmp(mapping_de->d_name, ".") == 0 || @@ -5346,11 +5350,14 @@ UpdateLogicalMappings(HTAB *tuplecid_data, Oid relid, Snapshot snapshot) continue; if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, - &f_dboid, &f_relid, &f_hi, &f_lo, - &f_mapped_xid, &f_create_xid) != 6) + &f_dboid, &f_relid, &f_lsn_hi, &f_lsn_lo, + &f_mapped_xid_hi, &f_mapped_xid_lo, + &f_create_xid_hi, &f_create_xid_lo) != 8) elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); - f_lsn = ((uint64) f_hi) << 32 | f_lo; + f_lsn = ((uint64) f_lsn_hi) << 32 | f_lsn_lo; + f_mapped_xid = ((uint64) f_mapped_xid_hi) << 32 | f_mapped_xid_lo; + f_create_xid = ((uint64) f_create_xid_hi) << 32 | f_create_xid_lo; /* mapping for another database */ if (f_dboid != dboid) diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index 2c0a7439be4..1ea13073534 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -212,7 +212,7 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1, errmsg("could not synchronize replication slot \"%s\" because remote slot precedes local slot", remote_slot->name), - errdetail("The remote slot has LSN %X/%X and catalog xmin %u, but the local slot has LSN %X/%X and catalog xmin %u.", + errdetail("The remote slot has LSN %X/%X and catalog xmin %lu, but the local slot has LSN %X/%X and catalog xmin %lu.", LSN_FORMAT_ARGS(remote_slot->restart_lsn), remote_slot->catalog_xmin, LSN_FORMAT_ARGS(slot->data.restart_lsn), diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 7e2ad310153..eba8b2567cc 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -461,7 +461,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder) elog(ERROR, "cannot build an initial slot snapshot, not all transactions are monitored anymore"); /* so we don't overwrite the existing value */ - if (TransactionIdIsValid(MyProc->xmin)) + if (TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin))) elog(ERROR, "cannot build an initial slot snapshot when MyProc->xmin already is valid"); snap = SnapBuildBuildSnapshot(builder); @@ -483,7 +483,7 @@ SnapBuildInitialSnapshot(SnapBuild *builder) elog(ERROR, "cannot build an initial slot snapshot as oldest safe xid %llu follows snapshot's xmin %llu", (unsigned long long) safeXid, (unsigned long long) snap->xmin); - MyProc->xmin = snap->xmin; + pg_atomic_write_u64(&MyProc->xmin, snap->xmin); /* allocate in transaction context */ newxip = (TransactionId *) diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 42fe46d912b..90b93c8e188 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -581,7 +581,7 @@ handle_streamed_transaction(LogicalRepMsgType action, StringInfo s) * We should have received XID of the subxact as the first part of the * message, so extract it. */ - current_xid = pq_getmsgint(s, 4); + current_xid = pq_getmsgint64(s); if (!TransactionIdIsValid(current_xid)) ereport(ERROR, diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 098d7e78eff..583f957e69f 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -1192,10 +1192,6 @@ static void XLogWalRcvSendHSFeedback(bool immed) { TimestampTz now; - FullTransactionId nextFullXid; - TransactionId nextXid; - uint32 xmin_epoch, - catalog_xmin_epoch; TransactionId xmin, catalog_xmin; @@ -1247,31 +1243,15 @@ XLogWalRcvSendHSFeedback(bool immed) catalog_xmin = InvalidTransactionId; } - /* - * Get epoch and adjust if nextXid and oldestXmin are different sides of - * the epoch boundary. - */ - nextFullXid = ReadNextFullTransactionId(); - nextXid = XidFromFullTransactionId(nextFullXid); - xmin_epoch = EpochFromFullTransactionId(nextFullXid); - catalog_xmin_epoch = xmin_epoch; - if (nextXid < xmin) - xmin_epoch--; - if (nextXid < catalog_xmin) - catalog_xmin_epoch--; - - elog(DEBUG2, "sending hot standby feedback xmin %llu epoch %u catalog_xmin %llu catalog_xmin_epoch %u", - (unsigned long long) xmin, xmin_epoch, - (unsigned long long) catalog_xmin, catalog_xmin_epoch); + elog(DEBUG2, "sending hot standby feedback xmin %llu catalog_xmin %llu", + (unsigned long long) xmin, (unsigned long long) catalog_xmin); /* Construct the message and send it. */ resetStringInfo(&reply_message); pq_sendbyte(&reply_message, 'h'); pq_sendint64(&reply_message, GetCurrentTimestamp()); - pq_sendint32(&reply_message, xmin); - pq_sendint32(&reply_message, xmin_epoch); - pq_sendint32(&reply_message, catalog_xmin); - pq_sendint32(&reply_message, catalog_xmin_epoch); + pq_sendint64(&reply_message, xmin); + pq_sendint64(&reply_message, catalog_xmin); walrcv_send(wrconn, reply_message.data, reply_message.len); if (TransactionIdIsValid(xmin) || TransactionIdIsValid(catalog_xmin)) primary_has_standby_xmin = true; diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index a6848631d58..61899382d58 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -268,7 +268,6 @@ static void WalSndUpdateProgress(LogicalDecodingContext *ctx, XLogRecPtr lsn, Tr static XLogRecPtr WalSndWaitForWal(XLogRecPtr loc); static void LagTrackerWrite(XLogRecPtr lsn, TimestampTz local_flush_time); static TimeOffset LagTrackerRead(int head, XLogRecPtr lsn, TimestampTz now); -static bool TransactionIdInRecentPast(TransactionId xid, uint32 epoch); static void WalSndSegmentOpen(XLogReaderState *state, XLogSegNo nextSegNo, TimeLineID *tli_p); @@ -304,7 +303,7 @@ InitWalSender(void) */ if (MyDatabaseId == InvalidOid) { - Assert(MyProc->xmin == InvalidTransactionId); + Assert(pg_atomic_read_u64(&MyProc->xmin) == InvalidTransactionId); LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); MyProc->statusFlags |= PROC_AFFECTS_ALL_HORIZONS; ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; @@ -2471,7 +2470,7 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbac ReplicationSlot *slot = MyReplicationSlot; SpinLockAcquire(&slot->mutex); - MyProc->xmin = InvalidTransactionId; + pg_atomic_write_u64(&MyProc->xmin, InvalidTransactionId); /* * For physical replication we don't need the interlock provided by xmin @@ -2503,44 +2502,6 @@ PhysicalReplicationSlotNewXmin(TransactionId feedbackXmin, TransactionId feedbac } } -/* - * Check that the provided xmin/epoch are sane, that is, not in the future - * and not so far back as to be already wrapped around. - * - * Epoch of nextXid should be same as standby, or if the counter has - * wrapped, then one greater than standby. - * - * This check doesn't care about whether clog exists for these xids - * at all. - */ -static bool -TransactionIdInRecentPast(TransactionId xid, uint32 epoch) -{ - FullTransactionId nextFullXid; - TransactionId nextXid; - uint32 nextEpoch; - - nextFullXid = ReadNextFullTransactionId(); - nextXid = XidFromFullTransactionId(nextFullXid); - nextEpoch = EpochFromFullTransactionId(nextFullXid); - - if (xid <= nextXid) - { - if (epoch != nextEpoch) - return false; - } - else - { - if (epoch + 1 != nextEpoch) - return false; - } - - if (!TransactionIdPrecedesOrEquals(xid, nextXid)) - return false; /* epoch OK, but it's wrapped around */ - - return true; -} - /* * Hot Standby feedback */ @@ -2548,9 +2509,7 @@ static void ProcessStandbyHSFeedbackMessage(void) { TransactionId feedbackXmin; - uint32 feedbackEpoch; TransactionId feedbackCatalogXmin; - uint32 feedbackCatalogEpoch; TimestampTz replyTime; /* @@ -2559,10 +2518,8 @@ ProcessStandbyHSFeedbackMessage(void) * of this message. */ replyTime = pq_getmsgint64(&reply_message); - feedbackXmin = pq_getmsgint(&reply_message, 4); - feedbackEpoch = pq_getmsgint(&reply_message, 4); - feedbackCatalogXmin = pq_getmsgint(&reply_message, 4); - feedbackCatalogEpoch = pq_getmsgint(&reply_message, 4); + feedbackXmin = pq_getmsgint64(&reply_message); + feedbackCatalogXmin = pq_getmsgint64(&reply_message); if (message_level_is_interesting(DEBUG2)) { @@ -2571,11 +2528,9 @@ ProcessStandbyHSFeedbackMessage(void) /* Copy because timestamptz_to_str returns a static buffer */ replyTimeStr = pstrdup(timestamptz_to_str(replyTime)); - elog(DEBUG2, "hot standby feedback xmin %llu epoch %u, catalog_xmin %llu epoch %u reply_time %s", + elog(DEBUG2, "hot standby feedback xmin %llu, catalog_xmin %llu reply_time %s", (unsigned long long) feedbackXmin, - feedbackEpoch, (unsigned long long) feedbackCatalogXmin, - feedbackCatalogEpoch, replyTimeStr); pfree(replyTimeStr); @@ -2600,24 +2555,12 @@ ProcessStandbyHSFeedbackMessage(void) if (!TransactionIdIsNormal(feedbackXmin) && !TransactionIdIsNormal(feedbackCatalogXmin)) { - MyProc->xmin = InvalidTransactionId; + pg_atomic_write_u64(&MyProc->xmin, InvalidTransactionId); if (MyReplicationSlot != NULL) PhysicalReplicationSlotNewXmin(feedbackXmin, feedbackCatalogXmin); return; } - /* - * Check that the provided xmin/epoch are sane, that is, not in the future - * and not so far back as to be already wrapped around. Ignore if not. - */ - if (TransactionIdIsNormal(feedbackXmin) && - !TransactionIdInRecentPast(feedbackXmin, feedbackEpoch)) - return; - - if (TransactionIdIsNormal(feedbackCatalogXmin) && - !TransactionIdInRecentPast(feedbackCatalogXmin, feedbackCatalogEpoch)) - return; - /* * Set the WalSender's xmin equal to the standby's requested xmin, so that * the xmin will be taken into account by GetSnapshotData() / @@ -2655,9 +2598,9 @@ ProcessStandbyHSFeedbackMessage(void) { if (TransactionIdIsNormal(feedbackCatalogXmin) && TransactionIdPrecedes(feedbackCatalogXmin, feedbackXmin)) - MyProc->xmin = feedbackCatalogXmin; + pg_atomic_write_u64(&MyProc->xmin, feedbackCatalogXmin); else - MyProc->xmin = feedbackXmin; + pg_atomic_write_u64(&MyProc->xmin, feedbackXmin); } } diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index a8b63ec0884..bdde68ce24e 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -2451,6 +2451,7 @@ statext_expressions_load(Oid stxoid, bool inh, int idx) ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; tmptup.t_data = td; + HeapTupleCopyHeaderXids(&tmptup); tup = heap_copytuple(&tmptup); diff --git a/src/backend/storage/buffer/Makefile b/src/backend/storage/buffer/Makefile index fd7c40dcb08..ffcc0fc290e 100644 --- a/src/backend/storage/buffer/Makefile +++ b/src/backend/storage/buffer/Makefile @@ -17,6 +17,7 @@ OBJS = \ buf_table.o \ bufmgr.o \ freelist.o \ - localbuf.o + localbuf.o \ + heap_convert.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 323382dcfa8..d244fbf20a8 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1602,6 +1602,30 @@ WaitReadBuffers(ReadBuffersOperation *operation) relpath(operation->smgr->smgr_rlocator, forknum).str))); } + if (PageGetPageLayoutVersion(bufBlock) != PG_PAGE_LAYOUT_VERSION && + !PageIsNew((Page) bufBlock)) + { + Buffer buf = BufferDescriptorGetBuffer(bufHdr); + + /* + * All the forks but MAIN_FORKNUM should be converted to the + * actual page layout version in pg_upgrade. + */ + if (forknum != MAIN_FORKNUM) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid fork type (%d) in block %u of relation %s", + forknum, blocknum, + relpath(operation->smgr->smgr_rlocator, forknum).str))); + + LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE); + /* Check for no concurrent changes */ + if (PageGetPageLayoutVersion(bufBlock) != PG_PAGE_LAYOUT_VERSION) + convert_page(operation->rel, buf, blocknum); + + LWLockRelease(BufferDescriptorGetContentLock(bufHdr)); + } + /* Set BM_VALID, terminate IO, and wake up any waiters */ if (persistence == RELPERSISTENCE_TEMP) TerminateLocalBufferIO(bufHdr, false, BM_VALID); @@ -5110,6 +5134,64 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) } } +/* + * Mark buffer as converted - ie its format is changed without logical changes. + * + * It will override `full_page_write` GUC setting in XLogRecordAssemble. + */ +void +MarkBufferConverted(Buffer buffer, bool converted) +{ + BufferDesc *bufHdr; + uint32 buf_state; + bool has_mark; + + if (!BufferIsValid(buffer)) + elog(ERROR, "bad buffer ID: %d", buffer); + + Assert(!BufferIsLocal(buffer)); + + bufHdr = GetBufferDescriptor(buffer - 1); + + Assert(GetPrivateRefCount(buffer) > 0); + if (converted) + { + /* here, either share or exclusive lock is OK */ + Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr))); + } + + buf_state = pg_atomic_read_u32(&bufHdr->state); + has_mark = (buf_state & BM_CONVERTED) != 0; + if (converted == has_mark) + return; + + buf_state = LockBufHdr(bufHdr); + buf_state &= ~BM_CONVERTED; + if (converted) + buf_state |= BM_CONVERTED; + UnlockBufHdr(bufHdr, buf_state); +} + +bool +IsBufferConverted(Buffer buffer) +{ + + BufferDesc *bufHdr; + uint32 buf_state; + + if (!BufferIsValid(buffer)) + elog(ERROR, "bad buffer ID: %d", buffer); + + Assert(!BufferIsLocal(buffer)); + + bufHdr = GetBufferDescriptor(buffer - 1); + + Assert(GetPrivateRefCount(buffer) > 0); + + buf_state = pg_atomic_read_u32(&bufHdr->state); + return (buf_state & BM_CONVERTED) != 0; +} + /* * Release buffer content locks for shared buffers. * @@ -5144,6 +5226,47 @@ UnlockBuffers(void) } } +/* + * Is shared buffer is locked? + */ +bool +IsBufferLocked(Buffer buffer) +{ + BufferDesc *buf; + + if (buffer == InvalidBuffer) + return true; + + Assert(BufferIsPinned(buffer)); + if (BufferIsLocal(buffer)) + return true; /* local buffers need no lock */ + + buf = GetBufferDescriptor(buffer - 1); + + return LWLockHeldByMe(BufferDescriptorGetContentLock(buf)); +} + +/* + * Is shared buffer is locked exclusive? + */ +bool +IsBufferLockedExclusive(Buffer buffer) +{ + BufferDesc *buf; + + if (buffer == InvalidBuffer) + return true; + + Assert(BufferIsPinned(buffer)); + if (BufferIsLocal(buffer)) + return true; /* local buffers need no lock */ + + buf = GetBufferDescriptor(buffer - 1); + + return LWLockHeldByMeInMode(BufferDescriptorGetContentLock(buf), + LW_EXCLUSIVE); +} + /* * Acquire or release the content_lock for the buffer. */ diff --git a/src/backend/storage/buffer/heap_convert.c b/src/backend/storage/buffer/heap_convert.c new file mode 100644 index 00000000000..9adf84f8874 --- /dev/null +++ b/src/backend/storage/buffer/heap_convert.c @@ -0,0 +1,552 @@ +/*------------------------------------------------------------------------- + * + * heap_convert.c + * Heap page converter from 32bit to 64bit xid format + * + * Copyright (c) 2015-2022, Postgres Professional + * Copyright (c) 2024, Tantor Labs + * + * IDENTIFICATION + * src/backend/storage/buffer/heap_convert.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/generic_xlog.h" +#include "access/heapam.h" +#include "access/multixact.h" +#include "catalog/catalog.h" +#include "storage/bufmgr.h" +#include "storage/checksum.h" + +static void repack_heap_tuples(Relation rel, Page page, Buffer buf, + BlockNumber blkno, bool double_xmax); + +/* + * itemoffcompare + * Sorting support for repack_tuples() + */ +int +itemoffcompare(const void *item1, const void *item2) +{ + /* Sort in decreasing itemoff order */ + return ((ItemIdCompactData *) item2)->itemoff - + ((ItemIdCompactData *) item1)->itemoff; +} + +/* + * Lazy page conversion from 32-bit to 64-bit XID at first read. + */ +void +convert_page(Relation rel, Buffer buf, BlockNumber blkno) +{ + static unsigned logcnt = 0; + bool logit; + PageHeader hdr; + GenericXLogState *state = NULL; + uint16 checksum; + bool try_double_xmax; + Page page = BufferGetPage(buf); + hdr = (PageHeader) page; + + /* Not during XLog replaying */ + Assert(rel != NULL); + + /* Verify checksum */ + if (hdr->pd_checksum) + { + checksum = pg_checksum_page((char *) page, blkno); + if (checksum != hdr->pd_checksum) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("page verification failed, calculated checksum %u but expected %u", + checksum, hdr->pd_checksum))); + } + + /* + * We occasionally force logging of page conversion, so never-changed + * pages are converted in the end. FORCE_LOG_EVERY is chosen arbitrarily + * to log neither too much nor too little. + */ +#define FORCE_LOG_EVERY 128 + logit = !RecoveryInProgress() && XLogIsNeeded() && RelationNeedsWAL(rel); + logit = logit && (++logcnt % FORCE_LOG_EVERY) == 0; + if (logit) + { + state = GenericXLogStart(rel); + page = GenericXLogRegisterBuffer(state, buf, + GENERIC_XLOG_FULL_IMAGE); + hdr = (PageHeader) page; + } +#ifdef USE_ASSERT_CHECKING + else + { + /* Not already converted */ + Assert(PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION); + /* Page in 32-bit xid format should not have PageSpecial. */ + Assert(PageGetSpecialSize(page) == 0); + } +#endif + + switch (rel->rd_rel->relkind) + { + case 't': + try_double_xmax = hdr->pd_upper - hdr->pd_lower < + MAXALIGN(sizeof(ToastPageSpecialData)); + repack_heap_tuples(rel, page, buf, blkno, try_double_xmax); + break; + case 'r': + case 'p': + case 'm': + try_double_xmax = hdr->pd_upper - hdr->pd_lower < + MAXALIGN(sizeof(HeapPageSpecialData)); + repack_heap_tuples(rel, page, buf, blkno, try_double_xmax); + break; + case 'i': + /* no need to convert index */ + case 'S': + /* no real need to convert sequences */ + break; + default: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("conversion for relation \"%s\" cannot be done", + RelationGetRelationName(rel)), + errdetail_relkind_not_supported(rel->rd_rel->relkind))); + } + + hdr->pd_checksum = pg_checksum_page((char *) page, blkno); + + PageSetPageSizeAndVersion(page, PageGetPageSize(page), + PG_PAGE_LAYOUT_VERSION); + + if (logit) + { + /* + * Finish logging buffer conversion and mark buffer as dirty. + */ + Assert(state != NULL); + MarkBufferDirty(buf); + GenericXLogFinish(state); + } + else + { + /* + * Otherwise, it will be logged with full-page-write record on first + * actual change. + */ + MarkBufferConverted(buf, true); + } +} + +/* + * Convert xmin and xmax in a tuple. + * This also considers special cases: "double xmax" page format and multixact + * in xmax. + */ +static void +convert_heap_tuple_xids(HeapTupleHeader tuple, TransactionId xid_base, + MultiXactId multi_base, bool double_xmax) +{ + /* Convert xmin */ + if (double_xmax) + { + /* Prepare tuple for "double xmax" page format */ + tuple->t_infomask |= HEAP_XMIN_FROZEN; + tuple->t_choice.t_heap.t_xmin = 0; + } + else + { + TransactionId xmin = tuple->t_choice.t_heap.t_xmin; + + if (TransactionIdIsNormal(xmin)) + { + if (HeapTupleHeaderXminFrozen(tuple)) + tuple->t_choice.t_heap.t_xmin = FrozenTransactionId; + else if (HeapTupleHeaderXminInvalid(tuple)) + tuple->t_choice.t_heap.t_xmin = InvalidTransactionId; + else + { + Assert(xmin >= xid_base + FirstNormalTransactionId); + /* Subtract xid_base from normal xmin */ + tuple->t_choice.t_heap.t_xmin = xmin - xid_base; + } + } + } + + /* If tuple has multixact flag, handle mxid wraparound */ + if ((tuple->t_infomask & HEAP_XMAX_IS_MULTI) && + !(tuple->t_infomask & HEAP_XMAX_INVALID)) + { + MultiXactId mxid = tuple->t_choice.t_heap.t_xmax; + + /* Handle mxid wraparound */ + if (mxid < multi_base) + { + mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId; + Assert(mxid >= multi_base); + } + + if (double_xmax) + { + /* Save converted mxid into "double xmax" format */ + HeapTupleHeaderSetDoubleXmax(tuple, mxid); + } + else + { + /* + * Save converted mxid offset relative to (minmxid - 1), which + * will be page's mxid base. + */ + Assert(mxid - multi_base + FirstMultiXactId <= PG_UINT32_MAX); + tuple->t_choice.t_heap.t_xmax = + (uint32) (mxid - multi_base + FirstMultiXactId); + } + } + /* Convert xmax */ + else if (!(tuple->t_infomask & HEAP_XMAX_INVALID)) + { + TransactionId xmax = tuple->t_choice.t_heap.t_xmax; + + if (double_xmax) + { + /* Save converted xmax into "double xmax" format */ + HeapTupleHeaderSetDoubleXmax(tuple, xmax); + } + else if (TransactionIdIsNormal(xmax)) + { + /* Subtract xid_base from normal xmax */ + Assert(xmax >= xid_base + FirstNormalTransactionId); + tuple->t_choice.t_heap.t_xmax = xmax - xid_base; + } + } + else + { + if (double_xmax) + HeapTupleHeaderSetDoubleXmax(tuple, InvalidTransactionId); + else + tuple->t_choice.t_heap.t_xmax = InvalidTransactionId; + } +} + +/* + * Correct page xmin/xmax based on tuple xmin/xmax values. + */ +static void +compute_xid_min_max(HeapTuple tuple, MultiXactId multi_base, + TransactionId *xid_min, TransactionId *xid_max, + MultiXactId *multi_min, MultiXactId *multi_max) +{ + /* xmin */ + if (!HeapTupleHeaderXminInvalid(tuple->t_data) && + !HeapTupleHeaderXminFrozen(tuple->t_data)) + { + TransactionId xid = HeapTupleGetRawXmin(tuple); + + if (TransactionIdIsNormal(xid)) + { + *xid_max = Max(*xid_max, xid); + *xid_min = Min(*xid_min, xid); + } + } + + /* xmax */ + if (!(tuple->t_data->t_infomask & HEAP_XMAX_INVALID)) + { + TransactionId xid; + + if (tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI) + { + MultiXactId mxid = HeapTupleGetRawXmax(tuple); + + Assert(MultiXactIdIsValid(mxid)); + + /* Handle mxid wraparound */ + if (mxid < multi_base) + { + mxid += ((MultiXactId) 1 << 32) - FirstMultiXactId; + Assert(mxid >= multi_base); + } + + *multi_max = Max(*multi_max, mxid); + *multi_min = Min(*multi_min, mxid); + + /* + * Also take into account hidden update xid, which can be + * extracted by the vacuum. + */ + if (tuple->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY) + xid = InvalidTransactionId; + else + xid = HeapTupleGetUpdateXid(tuple); + } + else + { + xid = HeapTupleGetRawXmax(tuple); + } + + if (TransactionIdIsNormal(xid)) + { + *xid_max = Max(*xid_max, xid); + *xid_min = Min(*xid_min, xid); + } + } +} + +/* + * Returns true if both: + * - xid_max: an uppper boundary of xmin's and xmax'es of all tuples on a page + * - xid_min: a lower boundary of xmin's and xmax'es of all tuples on a page + * can be expressed by 32-bit number relative to page's xid_base/multi_base + * or invalid. + * + * True value effectively means that these tuples can be directly put on one + * page in 64-xid format. + */ +static inline bool +xids_fit_page(TransactionId xid_min, TransactionId xid_max, + MultiXactId multi_min, MultiXactId multi_max) +{ + bool xid_max_fits = false; + bool multi_max_fits = false; + + if (xid_max == InvalidTransactionId) + xid_max_fits = true; + + if (xid_max - xid_min <= MaxShortTransactionId - FirstNormalTransactionId) + xid_max_fits = true; + + if (multi_max == InvalidMultiXactId) + multi_max_fits = true; + + if (multi_max - multi_min <= MaxShortTransactionId - FirstMultiXactId) + multi_max_fits = true; + + return xid_max_fits && multi_max_fits; +} + +/* + * Set "base" for page in 64-bit XID format. + * + * This should not be called for double xmax pages. They do not have place for + * page special. + */ +static inline void +heap_page_set_base(Page page, + TransactionId xid_min, TransactionId xid_max, + MultiXactId multi_min, MultiXactId multi_max, + TransactionId *xid_base, MultiXactId *multi_base, + bool is_toast) +{ + PageHeader hdr = (PageHeader) page; + + if (xid_max != InvalidTransactionId) + *xid_base = xid_min - FirstNormalTransactionId; + else + *xid_base = InvalidTransactionId; + + if (multi_max != InvalidMultiXactId) + *multi_base = multi_min - FirstMultiXactId; + else + *multi_base = InvalidMultiXactId; + + if (is_toast) + { + ToastPageSpecial special; + + hdr->pd_special = BLCKSZ - MAXALIGN(sizeof(ToastPageSpecialData)); + special = ToastPageGetSpecial(page); + special->pd_xid_base = *xid_base; + } + else + { + HeapPageSpecial special; + + hdr->pd_special = BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData)); + special = HeapPageGetSpecial(page); + special->pd_xid_base = *xid_base; + special->pd_multi_base = *multi_base; + } +} + +/* + * repack_heap_tuples + * Convert heap page format reusing space of dead tuples + */ +static void +repack_heap_tuples(Relation rel, Page page, Buffer buf, BlockNumber blkno, + bool try_double_xmax) +{ + ItemIdCompactData items[MaxHeapTuplesPerPage]; + ItemIdCompact itemPtr = items; + int nitems = 0, + maxoff = PageGetMaxOffsetNumber(page), + idx, + occupied_space = 0; + Offset upper; + bool double_xmax, + special_fits, + toast; + PageHeader hdr = (PageHeader) page, + new_hdr; + PGAlignedBlock zerobuf = {0}; + Page new_page; + MultiXactId multi_base = rel->rd_rel->relminmxid, + multi_min = MaxMultiXactId, + multi_max = InvalidMultiXactId; + TransactionId xid_base = rel->rd_rel->relfrozenxid, + xid_min = MaxTransactionId, + xid_max = InvalidTransactionId; + + toast = IsToastRelation(rel); + + if (TransactionIdIsNormal(hdr->pd_prune_xid)) + xid_min = xid_max = hdr->pd_prune_xid; + + for (idx = 0; idx < maxoff; idx++) + { + HeapTupleData tuple; + ItemId lp; + + lp = PageGetItemId(page, idx + 1); + + /* Skip redirects and items without storage */ + if (!ItemIdHasStorage(lp)) + continue; + + /* Build in-memory tuple representation */ + tuple.t_tableOid = 1; /* doesn't matter in this case */ + tuple.t_data = (HeapTupleHeader) PageGetItem(page, lp); + HeapTupleCopyHeaderXids(&tuple); + tuple.t_len = ItemIdGetLength(lp); + ItemPointerSet(&(tuple.t_self), blkno, ItemIdGetOffset(lp)); + + /* + * This is only needed to determine whether tuple is HEAPTUPLE_DEAD or + * HEAPTUPLE_RECENTLY_DEAD. And since this is the first time we read + * page after pg_upgrade, it cannot be HEAPTUPLE_RECENTLY_DEAD. See + * HeapTupleSatisfiesVacuum() for details + */ + if (try_double_xmax && + HeapTupleSatisfiesVacuum(&tuple, + (TransactionId) 1 << 32, buf) == HEAPTUPLE_DEAD) + { + ItemIdSetDead(lp); + } + + if (ItemIdIsNormal(lp) && ItemIdHasStorage(lp)) + { + itemPtr->offsetindex = idx; + itemPtr->itemoff = ItemIdGetOffset(lp); + if (unlikely(itemPtr->itemoff < hdr->pd_upper || + itemPtr->itemoff >= hdr->pd_special)) + { + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("corrupted item pointer: %u", + itemPtr->itemoff))); + } + + itemPtr->alignedlen = MAXALIGN(ItemIdGetLength(lp)); + occupied_space += itemPtr->alignedlen; + nitems++; + itemPtr++; + if (try_double_xmax) + { + HeapTupleSetXmin(&tuple, FrozenTransactionId); + HeapTupleHeaderStoreXminFrozen(tuple.t_data); + } + + compute_xid_min_max(&tuple, multi_base, + &xid_min, &xid_max, + &multi_min, &multi_max); + } + } + + new_page = (Page) zerobuf.data; + MemSet(new_page, 0, BLCKSZ); + /* Write new header */ + new_hdr = (PageHeader) new_page; + *new_hdr = *hdr; + new_hdr->pd_lower = SizeOfPageHeaderData + maxoff * sizeof(ItemIdData); + + if (toast) + special_fits = BLCKSZ - new_hdr->pd_lower - occupied_space >= + sizeof(ToastPageSpecialData); + else + special_fits = BLCKSZ - new_hdr->pd_lower - occupied_space >= + sizeof(HeapPageSpecialData); + + double_xmax = !special_fits || + !xids_fit_page(xid_min, xid_max, multi_min, multi_max); + + if (!double_xmax) + { + Assert(xid_max == InvalidTransactionId || xid_max >= xid_min); + Assert(multi_max == InvalidMultiXactId || multi_max >= multi_min); + + heap_page_set_base(new_page, + xid_min, xid_max, + multi_min, multi_max, + &xid_base, &multi_base, + toast); + + HeapPageSetPruneXid(new_page, new_hdr->pd_prune_xid, toast); + } + else + { + /* No space for special area, switch to "double xmax" format */ + elog(DEBUG2, "convert heap page %u of relation \"%s\" to double xmax format", + blkno, RelationGetRelationName(rel)); + + if (try_double_xmax) + { + xid_base = InvalidTransactionId; + multi_base = InvalidMultiXactId; + } + else + { + repack_heap_tuples(rel, page, buf, blkno, true); + return; + } + } + + /* Copy ItemIds with an offset */ + memcpy((char *) new_page + SizeOfPageHeaderData, + (char *) page + SizeOfPageHeaderData, + hdr->pd_lower - SizeOfPageHeaderData); + + /* Move live tuples */ + upper = new_hdr->pd_special; + for (idx = 0; idx < nitems; idx++) + { + HeapTupleHeader tuple; + ItemId lp; + + itemPtr = &items[idx]; + lp = PageGetItemId(new_page, itemPtr->offsetindex + 1); + upper -= itemPtr->alignedlen; + occupied_space -= itemPtr->alignedlen; + + memcpy((char *) new_page + upper, + (char *) page + itemPtr->itemoff, + itemPtr->alignedlen); + + tuple = (HeapTupleHeader) (((char *) new_page) + upper); + + convert_heap_tuple_xids(tuple, xid_base, multi_base, double_xmax); + + lp->lp_off = upper; + } + + Assert(occupied_space == 0); + + new_hdr->pd_upper = upper; + if (new_hdr->pd_lower > new_hdr->pd_upper) + elog(ERROR, "cannot convert block %u of relation \"%s\"", + blkno, RelationGetRelationName(rel)); + + memcpy(page, new_page, BLCKSZ); +} diff --git a/src/backend/storage/buffer/meson.build b/src/backend/storage/buffer/meson.build index 448976d2400..3b4d32e4dc5 100644 --- a/src/backend/storage/buffer/meson.build +++ b/src/backend/storage/buffer/meson.build @@ -5,5 +5,6 @@ backend_sources += files( 'buf_table.c', 'bufmgr.c', 'freelist.c', + 'heap_convert.c', 'localbuf.c', ) diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 01b32afa50e..7e2fe7d41c4 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -65,7 +65,7 @@ #include "utils/rel.h" #include "utils/snapmgr.h" -#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var)))) +#define UINT64_ACCESS_ONCE(var) ((uint64)(*((volatile uint64 *)&(var)))) /* Our shared memory area */ typedef struct ProcArrayStruct @@ -365,8 +365,6 @@ static void ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid); static void MaintainLatestCompletedXid(TransactionId latestXid); static void MaintainLatestCompletedXidRecovery(TransactionId latestXid); -static inline FullTransactionId FullXidRelativeTo(FullTransactionId rel, - TransactionId xid); static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons); /* @@ -525,7 +523,8 @@ ProcArrayAdd(PGPROC *proc) arrayP->pgprocnos[index] = GetNumberFromPGProc(proc); proc->pgxactoff = index; - ProcGlobal->xids[index] = proc->xid; + pg_atomic_write_u64(&ProcGlobal->xids[index], + pg_atomic_read_u64(&proc->xid)); ProcGlobal->subxidStates[index] = proc->subxidStatus; ProcGlobal->statusFlags[index] = proc->statusFlags; @@ -585,7 +584,7 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid) if (TransactionIdIsValid(latestXid)) { - Assert(TransactionIdIsValid(ProcGlobal->xids[myoff])); + Assert(TransactionIdIsValid(pg_atomic_read_u64(&ProcGlobal->xids[myoff]))); /* Advance global latestCompletedXid while holding the lock */ MaintainLatestCompletedXid(latestXid); @@ -593,17 +592,17 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid) /* Same with xactCompletionCount */ TransamVariables->xactCompletionCount++; - ProcGlobal->xids[myoff] = InvalidTransactionId; + pg_atomic_write_u64(&ProcGlobal->xids[myoff], InvalidTransactionId); ProcGlobal->subxidStates[myoff].overflowed = false; ProcGlobal->subxidStates[myoff].count = 0; } else { /* Shouldn't be trying to remove a live transaction here */ - Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff])); + Assert(!TransactionIdIsValid(pg_atomic_read_u64(&(ProcGlobal->xids[myoff])))); } - Assert(!TransactionIdIsValid(ProcGlobal->xids[myoff])); + Assert(!TransactionIdIsValid(pg_atomic_read_u64(&(ProcGlobal->xids[myoff])))); Assert(ProcGlobal->subxidStates[myoff].count == 0); Assert(ProcGlobal->subxidStates[myoff].overflowed == false); @@ -649,7 +648,6 @@ ProcArrayRemove(PGPROC *proc, TransactionId latestXid) LWLockRelease(ProcArrayLock); } - /* * ProcArrayEndTransaction -- mark a transaction as no longer running * @@ -674,7 +672,7 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) * else is taking a snapshot. See discussion in * src/backend/access/transam/README. */ - Assert(TransactionIdIsValid(proc->xid)); + Assert(TransactionIdIsValid(pg_atomic_read_u64(&proc->xid))); /* * If we can immediately acquire ProcArrayLock, we clear our own XID @@ -696,12 +694,12 @@ ProcArrayEndTransaction(PGPROC *proc, TransactionId latestXid) * anyone else's calculation of a snapshot. We might change their * estimate of global xmin, but that's OK. */ - Assert(!TransactionIdIsValid(proc->xid)); + Assert(!TransactionIdIsValid(pg_atomic_read_u64(&proc->xid))); Assert(proc->subxidStatus.count == 0); Assert(!proc->subxidStatus.overflowed); proc->vxid.lxid = InvalidLocalTransactionId; - proc->xmin = InvalidTransactionId; + pg_atomic_write_u64(&proc->xmin, InvalidTransactionId); /* be sure this is cleared in abort */ proc->delayChkptFlags = 0; @@ -737,13 +735,14 @@ ProcArrayEndTransactionInternal(PGPROC *proc, TransactionId latestXid) * processes' PGPROC entries. */ Assert(LWLockHeldByMeInMode(ProcArrayLock, LW_EXCLUSIVE)); - Assert(TransactionIdIsValid(ProcGlobal->xids[pgxactoff])); - Assert(ProcGlobal->xids[pgxactoff] == proc->xid); + Assert(TransactionIdIsValid(pg_atomic_read_u64(&ProcGlobal->xids[pgxactoff]))); + Assert(pg_atomic_read_u64(&ProcGlobal->xids[pgxactoff]) == + pg_atomic_read_u64(&proc->xid)); - ProcGlobal->xids[pgxactoff] = InvalidTransactionId; - proc->xid = InvalidTransactionId; + pg_atomic_write_u64(&ProcGlobal->xids[pgxactoff], InvalidTransactionId); + pg_atomic_write_u64(&proc->xid, InvalidTransactionId); proc->vxid.lxid = InvalidLocalTransactionId; - proc->xmin = InvalidTransactionId; + pg_atomic_write_u64(&proc->xmin, InvalidTransactionId); /* be sure this is cleared in abort */ proc->delayChkptFlags = 0; @@ -797,7 +796,7 @@ ProcArrayGroupClearXid(PGPROC *proc, TransactionId latestXid) uint32 wakeidx; /* We should definitely have an XID to clear. */ - Assert(TransactionIdIsValid(proc->xid)); + Assert(TransactionIdIsValid(pg_atomic_read_u64(&proc->xid))); /* Add ourselves to the list of processes needing a group XID clear. */ proc->procArrayGroupMember = true; @@ -926,11 +925,11 @@ ProcArrayClearTransaction(PGPROC *proc) pgxactoff = proc->pgxactoff; - ProcGlobal->xids[pgxactoff] = InvalidTransactionId; - proc->xid = InvalidTransactionId; + pg_atomic_write_u64(&ProcGlobal->xids[pgxactoff], InvalidTransactionId); + pg_atomic_write_u64(&proc->xid, InvalidTransactionId); proc->vxid.lxid = InvalidLocalTransactionId; - proc->xmin = InvalidTransactionId; + pg_atomic_write_u64(&proc->xmin, InvalidTransactionId); proc->recoveryConflictPending = false; Assert(!(proc->statusFlags & PROC_VACUUM_STATE_MASK)); @@ -974,8 +973,7 @@ MaintainLatestCompletedXid(TransactionId latestXid) if (TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid)) { - TransamVariables->latestCompletedXid = - FullXidRelativeTo(cur_latest, latestXid); + TransamVariables->latestCompletedXid = FullTransactionIdFromXid(latestXid); } Assert(IsBootstrapProcessingMode() || @@ -989,7 +987,6 @@ static void MaintainLatestCompletedXidRecovery(TransactionId latestXid) { FullTransactionId cur_latest = TransamVariables->latestCompletedXid; - FullTransactionId rel; Assert(AmStartupProcess() || !IsUnderPostmaster); Assert(LWLockHeldByMe(ProcArrayLock)); @@ -999,14 +996,12 @@ MaintainLatestCompletedXidRecovery(TransactionId latestXid) * latestCompletedXid to be initialized in recovery. But in recovery it's * safe to access nextXid without a lock for the startup process. */ - rel = TransamVariables->nextXid; Assert(FullTransactionIdIsValid(TransamVariables->nextXid)); if (!FullTransactionIdIsValid(cur_latest) || TransactionIdPrecedes(XidFromFullTransactionId(cur_latest), latestXid)) { - TransamVariables->latestCompletedXid = - FullXidRelativeTo(rel, latestXid); + TransamVariables->latestCompletedXid = FullTransactionIdFromXid(latestXid); } Assert(FullTransactionIdIsNormal(TransamVariables->latestCompletedXid)); @@ -1402,7 +1397,7 @@ bool TransactionIdIsInProgress(TransactionId xid) { static TransactionId *xids = NULL; - static TransactionId *other_xids; + static pg_atomic_uint64 *other_xids; XidCacheStatus *other_subxidstates; int nxids = 0; ProcArrayStruct *arrayP = procArray; @@ -1498,7 +1493,7 @@ TransactionIdIsInProgress(TransactionId xid) continue; /* Fetch xid just once - see GetNewTransactionId */ - pxid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]); + pxid = pg_atomic_read_u64(&(other_xids[pgxactoff])); if (!TransactionIdIsValid(pxid)) continue; @@ -1530,7 +1525,7 @@ TransactionIdIsInProgress(TransactionId xid) for (j = pxids - 1; j >= 0; j--) { /* Fetch xid just once - see GetNewTransactionId */ - TransactionId cxid = UINT32_ACCESS_ONCE(proc->subxids.xids[j]); + TransactionId cxid = UINT64_ACCESS_ONCE(proc->subxids.xids[j]); if (TransactionIdEquals(cxid, xid)) { @@ -1615,7 +1610,7 @@ TransactionIdIsInProgress(TransactionId xid) topxid = SubTransGetTopmostTransaction(xid); Assert(TransactionIdIsValid(topxid)); if (!TransactionIdEquals(topxid, xid) && - pg_lfind32(topxid, xids, nxids)) + pg_lfind64(topxid, xids, nxids)) return true; cachedXidIsNotInProgress = xid; @@ -1635,7 +1630,7 @@ TransactionIdIsActive(TransactionId xid) { bool result = false; ProcArrayStruct *arrayP = procArray; - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; int i; /* @@ -1654,7 +1649,7 @@ TransactionIdIsActive(TransactionId xid) TransactionId pxid; /* Fetch xid just once - see GetNewTransactionId */ - pxid = UINT32_ACCESS_ONCE(other_xids[i]); + pxid = pg_atomic_read_u64(&(other_xids[i])); if (!TransactionIdIsValid(pxid)) continue; @@ -1737,7 +1732,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) ProcArrayStruct *arrayP = procArray; TransactionId kaxmin; bool in_recovery = RecoveryInProgress(); - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; /* inferred after ProcArrayLock is released */ h->catalog_oldest_nonremovable = InvalidTransactionId; @@ -1753,7 +1748,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) * additions. */ { - TransactionId initial; + TransactionId initial, + xid; initial = XidFromFullTransactionId(h->latest_completed); Assert(TransactionIdIsValid(initial)); @@ -1775,8 +1771,9 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) * definition, can't be any newer changes in the temp table than * latestCompletedXid. */ - if (TransactionIdIsValid(MyProc->xid)) - h->temp_oldest_nonremovable = MyProc->xid; + xid = pg_atomic_read_u64(&MyProc->xid); + if (TransactionIdIsValid(xid)) + h->temp_oldest_nonremovable = xid; else h->temp_oldest_nonremovable = initial; } @@ -1798,8 +1795,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h) TransactionId xmin; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); - xmin = UINT32_ACCESS_ONCE(proc->xmin); + xid = pg_atomic_read_u64(&(other_xids[index])); + xmin = pg_atomic_read_u64(&proc->xmin); /* * Consider both the transaction's Xmin, and its Xid. @@ -2125,8 +2122,8 @@ GetSnapshotDataReuse(Snapshot snapshot) * requirement that concurrent GetSnapshotData() calls yield the same * xmin. */ - if (!TransactionIdIsValid(MyProc->xmin)) - MyProc->xmin = TransactionXmin = snapshot->xmin; + if (!TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin))) + pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = snapshot->xmin); RecentXmin = snapshot->xmin; Assert(TransactionIdPrecedesOrEquals(TransactionXmin, RecentXmin)); @@ -2175,7 +2172,7 @@ Snapshot GetSnapshotData(Snapshot snapshot) { ProcArrayStruct *arrayP = procArray; - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; TransactionId xmin; TransactionId xmax; int count = 0; @@ -2238,8 +2235,8 @@ GetSnapshotData(Snapshot snapshot) latest_completed = TransamVariables->latestCompletedXid; mypgxactoff = MyProc->pgxactoff; - myxid = other_xids[mypgxactoff]; - Assert(myxid == MyProc->xid); + myxid = pg_atomic_read_u64(&other_xids[mypgxactoff]); + Assert(myxid == pg_atomic_read_u64(&MyProc->xid)); oldestxid = TransamVariables->oldestXid; curXactCompletionCount = TransamVariables->xactCompletionCount; @@ -2273,7 +2270,7 @@ GetSnapshotData(Snapshot snapshot) for (int pgxactoff = 0; pgxactoff < numProcs; pgxactoff++) { /* Fetch xid just once - see GetNewTransactionId */ - TransactionId xid = UINT32_ACCESS_ONCE(other_xids[pgxactoff]); + TransactionId xid = pg_atomic_read_u64(&(other_xids[pgxactoff])); uint8 statusFlags; Assert(allProcs[arrayP->pgprocnos[pgxactoff]].pgxactoff == pgxactoff); @@ -2410,8 +2407,8 @@ GetSnapshotData(Snapshot snapshot) replication_slot_xmin = procArray->replication_slot_xmin; replication_slot_catalog_xmin = procArray->replication_slot_catalog_xmin; - if (!TransactionIdIsValid(MyProc->xmin)) - MyProc->xmin = TransactionXmin = xmin; + if (!TransactionIdIsValid(pg_atomic_read_u64(&MyProc->xmin))) + pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin); LWLockRelease(ProcArrayLock); @@ -2423,12 +2420,7 @@ GetSnapshotData(Snapshot snapshot) FullTransactionId def_vis_fxid_data; FullTransactionId oldestfxid; - /* - * Converting oldestXid is only safe when xid horizon cannot advance, - * i.e. holding locks. While we don't hold the lock anymore, all the - * necessary data has been gathered with lock held. - */ - oldestfxid = FullXidRelativeTo(latest_completed, oldestxid); + oldestfxid = FullTransactionIdFromXid(oldestxid); /* Check whether there's a replication slot requiring an older xmin. */ def_vis_xid_data = @@ -2447,8 +2439,8 @@ GetSnapshotData(Snapshot snapshot) def_vis_xid = TransactionIdOlder(replication_slot_catalog_xmin, def_vis_xid); - def_vis_fxid = FullXidRelativeTo(latest_completed, def_vis_xid); - def_vis_fxid_data = FullXidRelativeTo(latest_completed, def_vis_xid_data); + def_vis_fxid = FullTransactionIdFromXid(def_vis_xid); + def_vis_fxid_data = FullTransactionIdFromXid(def_vis_xid_data); /* * Check if we can increase upper bound. As a previous @@ -2467,7 +2459,7 @@ GetSnapshotData(Snapshot snapshot) /* See temp_oldest_nonremovable computation in ComputeXidHorizons() */ if (TransactionIdIsNormal(myxid)) GlobalVisTempRels.definitely_needed = - FullXidRelativeTo(latest_completed, myxid); + FullTransactionIdFromXid(myxid); else { GlobalVisTempRels.definitely_needed = latest_completed; @@ -2577,7 +2569,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin, /* * Likewise, let's just make real sure its xmin does cover us. */ - xid = UINT32_ACCESS_ONCE(proc->xmin); + xid = pg_atomic_read_u64(&proc->xmin); if (!TransactionIdIsNormal(xid) || !TransactionIdPrecedesOrEquals(xid, xmin)) continue; @@ -2588,7 +2580,7 @@ ProcArrayInstallImportedXmin(TransactionId xmin, * GetSnapshotData first, we'll be overwriting a valid xmin here, so * we don't check that.) */ - MyProc->xmin = TransactionXmin = xmin; + pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin); result = true; break; @@ -2632,7 +2624,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc) * can't go backwards. Also, make sure it's running in the same database, * so that the per-database xmin cannot go backwards. */ - xid = UINT32_ACCESS_ONCE(proc->xmin); + xid = pg_atomic_read_u64(&proc->xmin); if (proc->databaseId == MyDatabaseId && TransactionIdIsNormal(xid) && TransactionIdPrecedesOrEquals(xid, xmin)) @@ -2641,7 +2633,7 @@ ProcArrayInstallRestoredXmin(TransactionId xmin, PGPROC *proc) * Install xmin and propagate the statusFlags that affect how the * value is interpreted by vacuum. */ - MyProc->xmin = TransactionXmin = xmin; + pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = xmin); MyProc->statusFlags = (MyProc->statusFlags & ~PROC_XMIN_FLAGS) | (proc->statusFlags & PROC_XMIN_FLAGS); ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; @@ -2692,7 +2684,7 @@ GetRunningTransactionData(void) static RunningTransactionsData CurrentRunningXactsData; ProcArrayStruct *arrayP = procArray; - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; RunningTransactions CurrentRunningXacts = &CurrentRunningXactsData; TransactionId latestCompletedXid; TransactionId oldestRunningXid; @@ -2752,7 +2744,7 @@ GetRunningTransactionData(void) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = pg_atomic_read_u64(&(other_xids[index])); /* * We don't need to store transactions that don't have a TransactionId @@ -2880,7 +2872,7 @@ TransactionId GetOldestActiveTransactionId(void) { ProcArrayStruct *arrayP = procArray; - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; TransactionId oldestRunningXid; int index; @@ -2906,7 +2898,7 @@ GetOldestActiveTransactionId(void) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = pg_atomic_read_u64(&(other_xids[index])); if (!TransactionIdIsNormal(xid)) continue; @@ -2994,7 +2986,7 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly) */ if (!recovery_in_progress) { - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; /* * Spin over procArray collecting min(ProcGlobal->xids[i]) @@ -3004,7 +2996,7 @@ GetOldestSafeDecodingTransactionId(bool catalogOnly) TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */ - xid = UINT32_ACCESS_ONCE(other_xids[index]); + xid = pg_atomic_read_u64(&(other_xids[index])); if (!TransactionIdIsNormal(xid)) continue; @@ -3176,8 +3168,8 @@ ProcNumberGetTransactionIds(ProcNumber procNumber, TransactionId *xid, if (proc->pid != 0) { - *xid = proc->xid; - *xmin = proc->xmin; + *xid = pg_atomic_read_u64(&proc->xid); + *xmin = pg_atomic_read_u64(&proc->xmin); *nsubxid = proc->subxidStatus.count; *overflowed = proc->subxidStatus.overflowed; } @@ -3257,7 +3249,7 @@ BackendXidGetPid(TransactionId xid) { int result = 0; ProcArrayStruct *arrayP = procArray; - TransactionId *other_xids = ProcGlobal->xids; + pg_atomic_uint64 *other_xids = ProcGlobal->xids; int index; if (xid == InvalidTransactionId) /* never match invalid xid */ @@ -3267,7 +3259,7 @@ BackendXidGetPid(TransactionId xid) for (index = 0; index < arrayP->numProcs; index++) { - if (other_xids[index] == xid) + if (pg_atomic_read_u64(&other_xids[index]) == xid) { int pgprocno = arrayP->pgprocnos[index]; PGPROC *proc = &allProcs[pgprocno]; @@ -3351,7 +3343,7 @@ GetCurrentVirtualXIDs(TransactionId limitXmin, bool excludeXmin0, if (allDbs || proc->databaseId == MyDatabaseId) { /* Fetch xmin just once - might change on us */ - TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin); + TransactionId pxmin = pg_atomic_read_u64(&proc->xmin); if (excludeXmin0 && !TransactionIdIsValid(pxmin)) continue; @@ -3451,7 +3443,7 @@ GetConflictingVirtualXIDs(TransactionId limitXmin, Oid dbOid) proc->databaseId == dbOid) { /* Fetch xmin just once - can't change on us, but good coding */ - TransactionId pxmin = UINT32_ACCESS_ONCE(proc->xmin); + TransactionId pxmin = pg_atomic_read_u64(&proc->xmin); /* * We ignore an invalid pxmin because this means that backend has @@ -3578,7 +3570,7 @@ MinimumActiveBackends(int min) continue; /* do not count deleted entries */ if (proc == MyProc) continue; /* do not count myself */ - if (proc->xid == InvalidTransactionId) + if (pg_atomic_read_u64(&proc->xid) == InvalidTransactionId) continue; /* do not count if no XID assigned */ if (proc->pid == 0) continue; /* do not count prepared xacts */ @@ -4168,17 +4160,13 @@ static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons) { GlobalVisSharedRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->shared_oldest_nonremovable); + FullTransactionIdFromXid(horizons->shared_oldest_nonremovable); GlobalVisCatalogRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->catalog_oldest_nonremovable); + FullTransactionIdFromXid(horizons->catalog_oldest_nonremovable); GlobalVisDataRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->data_oldest_nonremovable); + FullTransactionIdFromXid(horizons->data_oldest_nonremovable); GlobalVisTempRels.maybe_needed = - FullXidRelativeTo(horizons->latest_completed, - horizons->temp_oldest_nonremovable); + FullTransactionIdFromXid(horizons->temp_oldest_nonremovable); /* * In longer running transactions it's possible that transactions we @@ -4267,15 +4255,7 @@ GlobalVisTestIsRemovableXid(GlobalVisState *state, TransactionId xid) { FullTransactionId fxid; - /* - * Convert 32 bit argument to FullTransactionId. We can do so safely - * because we know the xid has to, at the very least, be between - * [oldestXid, nextXid), i.e. within 2 billion of xid. To avoid taking a - * lock to determine either, we can just compare with - * state->definitely_needed, which was based on those value at the time - * the current snapshot was built. - */ - fxid = FullXidRelativeTo(state->definitely_needed, xid); + fxid = FullTransactionIdFromXid(xid); return GlobalVisTestIsRemovableFullXid(state, fxid); } @@ -4308,32 +4288,6 @@ GlobalVisCheckRemovableXid(Relation rel, TransactionId xid) return GlobalVisTestIsRemovableXid(state, xid); } -/* - * Convert a 32 bit transaction id into 64 bit transaction id, by assuming it - * is within MaxTransactionId / 2 of XidFromFullTransactionId(rel). - * - * Be very careful about when to use this function. It can only safely be used - * when there is a guarantee that xid is within MaxTransactionId / 2 xids of - * rel. That e.g. can be guaranteed if the caller assures a snapshot is - * held by the backend and xid is from a table (where vacuum/freezing ensures - * the xid has to be within that range), or if xid is from the procarray and - * prevents xid wraparound that way. - */ -static inline FullTransactionId -FullXidRelativeTo(FullTransactionId rel, TransactionId xid) -{ - TransactionId rel_xid = XidFromFullTransactionId(rel); - - Assert(TransactionIdIsValid(xid)); - Assert(TransactionIdIsValid(rel_xid)); - - /* not guaranteed to find issues, but likely to catch mistakes */ - AssertTransactionIdInAllowableRange(xid); - - return FullTransactionIdFromU64(U64FromFullTransactionId(rel) - + (int32) (xid - rel_xid)); -} - /* ---------------------------------------------- * KnownAssignedTransactionIds sub-module diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index a8c67f2995c..60a68446fa4 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -521,8 +521,8 @@ ResolveRecoveryConflictWithSnapshotFullXid(FullTransactionId snapshotConflictHor FullTransactionId nextXid = ReadNextFullTransactionId(); uint64 diff; - diff = U64FromFullTransactionId(nextXid) - - U64FromFullTransactionId(snapshotConflictHorizon); + diff = XidFromFullTransactionId(nextXid) - + XidFromFullTransactionId(snapshotConflictHorizon); if (diff < MaxTransactionId / 2) { TransactionId truncated; diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index f50962983c3..73f857671a0 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -1277,10 +1277,16 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag) tag->locktag_field1); break; case LOCKTAG_TRANSACTION: - appendStringInfo(buf, - _("transaction %u"), - tag->locktag_field1); - break; + { + TransactionId xid; + + xid = (TransactionId) tag->locktag_field2 << 32; + xid += tag->locktag_field1; + + appendStringInfo(buf, _("transaction %llu"), + (unsigned long long) xid); + break; + } case LOCKTAG_VIRTUALTRANSACTION: appendStringInfo(buf, _("virtual transaction %d/%u"), diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 38a5d75bbe1..086e52bae0d 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -4171,7 +4171,7 @@ GetRunningTransactionLocks(int *nlocks) { PGPROC *proc = proclock->tag.myProc; LOCK *lock = proclock->tag.myLock; - TransactionId xid = proc->xid; + TransactionId xid = pg_atomic_read_u64(&proc->xid); /* * Don't record locks for transactions if we know they have @@ -4791,7 +4791,7 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait) * so we won't save an XID of a different VXID. It doesn't matter whether * we save this before or after setting up the primary lock table entry. */ - xid = proc->xid; + xid = pg_atomic_read_u64(&proc->xid); /* Done with proc->fpLockBits */ LWLockRelease(&proc->fpInfoLock); diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 02a425c0bc6..a59e6d46cd0 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -338,9 +338,9 @@ static SlruCtlData SerialSlruCtlData; #define SerialValue(slotno, xid) (*((SerCommitSeqNo *) \ (SerialSlruCtl->shared->page_buffer[slotno] + \ - ((((uint32) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE)))) + ((((uint64) (xid)) % SERIAL_ENTRIESPERPAGE) * SERIAL_ENTRYSIZE)))) -#define SerialPage(xid) (((uint32) (xid)) / SERIAL_ENTRIESPERPAGE) +#define SerialPage(xid) ((int64) (((uint64) (xid)) / SERIAL_ENTRIESPERPAGE)) typedef struct SerialControlData { @@ -1077,31 +1077,6 @@ CheckPointPredicate(void) /*---------- * The SLRU is no longer needed. Truncate to head before we set head * invalid. - * - * XXX: It's possible that the SLRU is not needed again until XID - * wrap-around has happened, so that the segment containing headPage - * that we leave behind will appear to be new again. In that case it - * won't be removed until XID horizon advances enough to make it - * current again. - * - * XXX: This should happen in vac_truncate_clog(), not in checkpoints. - * Consider this scenario, starting from a system with no in-progress - * transactions and VACUUM FREEZE having maximized oldestXact: - * - Start a SERIALIZABLE transaction. - * - Start, finish, and summarize a SERIALIZABLE transaction, creating - * one SLRU page. - * - Consume XIDs to reach xidStopLimit. - * - Finish all transactions. Due to the long-running SERIALIZABLE - * transaction, earlier checkpoints did not touch headPage. The - * next checkpoint will change it, but that checkpoint happens after - * the end of the scenario. - * - VACUUM to advance XID limits. - * - Consume ~2M XIDs, crossing the former xidWrapLimit. - * - Start, finish, and summarize a SERIALIZABLE transaction. - * SerialAdd() declines to create the targetPage, because headPage - * is not regarded as in the past relative to that targetPage. The - * transaction instigating the summarize fails in - * SimpleLruReadPage(). */ truncateCutoffPage = serialControl->headPage; serialControl->headPage = -1; @@ -3974,7 +3949,7 @@ XidIsConcurrent(TransactionId xid) if (TransactionIdFollowsOrEquals(xid, snap->xmax)) return true; - return pg_lfind32(xid, snap->xip, snap->xcnt); + return pg_lfind64(xid, snap->xip, snap->xcnt); } bool diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index e4ca861a8e6..0dfd932f6b8 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -218,7 +218,7 @@ InitProcGlobal(void) * how hotly they are accessed. */ ProcGlobal->xids = - (TransactionId *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->xids)); + (pg_atomic_uint64 *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->xids)); MemSet(ProcGlobal->xids, 0, TotalProcs * sizeof(*ProcGlobal->xids)); ProcGlobal->subxidStates = (XidCacheStatus *) ShmemAlloc(TotalProcs * sizeof(*ProcGlobal->subxidStates)); MemSet(ProcGlobal->subxidStates, 0, TotalProcs * sizeof(*ProcGlobal->subxidStates)); @@ -245,6 +245,8 @@ InitProcGlobal(void) /* Common initialization for all PGPROCs, regardless of type. */ + pg_atomic_init_u64(&ProcGlobal->xids[i], 0); + /* * Set the fast-path lock arrays, and move the pointer. We interleave * the two arrays, to (hopefully) get some locality for each backend. @@ -423,8 +425,8 @@ InitProcess(void) MyProc->waitStatus = PROC_WAIT_STATUS_OK; MyProc->fpVXIDLock = false; MyProc->fpLocalTransactionId = InvalidLocalTransactionId; - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; + pg_atomic_init_u64(&MyProc->xid, InvalidTransactionId); + pg_atomic_init_u64(&MyProc->xmin, InvalidTransactionId); MyProc->pid = MyProcPid; MyProc->vxid.procNumber = MyProcNumber; MyProc->vxid.lxid = InvalidLocalTransactionId; @@ -624,8 +626,8 @@ InitAuxiliaryProcess(void) MyProc->waitStatus = PROC_WAIT_STATUS_OK; MyProc->fpVXIDLock = false; MyProc->fpLocalTransactionId = InvalidLocalTransactionId; - MyProc->xid = InvalidTransactionId; - MyProc->xmin = InvalidTransactionId; + pg_atomic_init_u64(&MyProc->xid, InvalidTransactionId); + pg_atomic_init_u64(&MyProc->xmin, InvalidTransactionId); MyProc->vxid.procNumber = INVALID_PROC_NUMBER; MyProc->vxid.lxid = InvalidLocalTransactionId; MyProc->databaseId = InvalidOid; diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index ecc81aacfc3..bbb8302301a 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -21,11 +21,31 @@ #include "storage/checksum.h" #include "utils/memdebug.h" #include "utils/memutils.h" +#include "utils/snapmgr.h" /* GUC variable */ bool ignore_checksum_failure = false; +/* + * HeapPageSpecialData used when pd_special == BLCKSZ. This is special format + * used when page with 32-bit xids doesn't fit HeapPageSpecialData. Then + * all xmin's are frozen (can do this for all live tuples after pg_upgrade), + * while 64-bit xmax is stored in both t_heap.t_xmin and t_heap.t_xmax. + * This is so-called "double xmax" format. + */ +static HeapPageSpecialData heapDoubleXmaxSpecialData = +{ + .pd_xid_base = MaxTransactionId, + .pd_multi_base = MaxTransactionId +}; +HeapPageSpecial heapDoubleXmaxSpecial = &heapDoubleXmaxSpecialData; + +static ToastPageSpecialData toastDoubleXmaxSpecialData = +{ + .pd_xid_base = MaxTransactionId +}; +ToastPageSpecial toastDoubleXmaxSpecial = &toastDoubleXmaxSpecialData; /* ---------------------------------------------------------------- * Page support functions @@ -421,15 +441,144 @@ PageRestoreTempPage(Page tempPage, Page oldPage) } /* - * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete + * Get minimum and maximum values of xid and multixact on "double xmax" page. */ -typedef struct itemIdCompactData +static void +heap_page_double_xmax_get_min_max(Page page, + TransactionId *xid_min, + TransactionId *xid_max, + MultiXactId *multi_min, + MultiXactId *multi_max) { - uint16 offsetindex; /* linp array index */ - int16 itemoff; /* page offset of item data */ - uint16 alignedlen; /* MAXALIGN(item data len) */ -} itemIdCompactData; -typedef itemIdCompactData *itemIdCompact; + bool xid_found = false, + multi_found = false; + OffsetNumber offnum, + maxoff; + + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleHeader htup; + TransactionId xmax; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsNormal(itemid)) + continue; + + htup = (HeapTupleHeader) PageGetItem(page, itemid); + + xmax = HeapTupleHeaderGetDoubleXmax(htup); + + if (!TransactionIdIsNormal(xmax)) + continue; + + if (!(htup->t_infomask & HEAP_XMAX_IS_MULTI)) + { + if (!xid_found) + { + *xid_min = *xid_max = xmax; + xid_found = true; + } + else + { + *xid_min = Min(*xid_min, xmax); + *xid_max = Max(*xid_max, xmax); + } + } + else + { + if (!multi_found) + { + *multi_min = *multi_max = xmax; + multi_found = true; + } + else + { + *multi_min = Min(*multi_min, xmax); + *multi_max = Max(*multi_max, xmax); + } + } + } +} + +/* + * Add special area to heap page, so convert from "double xmax" to normal + * format. + */ +static void +heap_page_add_special_area(ItemIdCompact itemidbase, int nitems, Page page, + TransactionId xid_base, MultiXactId multi_base, + bool is_toast) +{ + char newPage[BLCKSZ]; + PageHeader phdr = (PageHeader) page; + PageHeader new_phdr = (PageHeader) newPage; + Offset upper; + int i; + + memcpy(newPage, page, phdr->pd_lower); + + /* Add special area */ + if (is_toast) + { + ToastPageSpecial special; + + new_phdr->pd_special = PageGetPageSize(newPage) - sizeof(ToastPageSpecialData); + special = (ToastPageSpecial) ((Pointer) (newPage) + new_phdr->pd_special); + special->pd_xid_base = xid_base; + } + else + { + HeapPageSpecial special; + + new_phdr->pd_special = PageGetPageSize(newPage) - sizeof(HeapPageSpecialData); + special = (HeapPageSpecial) ((Pointer) (newPage) + new_phdr->pd_special); + special->pd_xid_base = xid_base; + special->pd_multi_base = multi_base; + } + + /* sort itemIdSortData array into decreasing itemoff order */ + qsort((char *) itemidbase, nitems, sizeof(ItemIdCompactData), + itemoffcompare); + + upper = new_phdr->pd_special; + for (i = 0; i < nitems; i++) + { + ItemIdCompact itemidptr = &itemidbase[i]; + ItemId lp; + HeapTupleHeader old_htup; + HeapTupleHeader new_htup; + TransactionId xmax; + + lp = PageGetItemId(page, itemidptr->offsetindex + 1); + old_htup = (HeapTupleHeader) PageGetItem(page, lp); + upper -= itemidptr->alignedlen; + memcpy((Pointer) newPage + upper, + (Pointer) page + itemidptr->itemoff, + itemidptr->alignedlen); + lp = PageGetItemId(newPage, itemidptr->offsetindex + 1); + lp->lp_off = upper; + new_htup = (HeapTupleHeader) PageGetItem(newPage, lp); + + /* Convert xmax value */ + new_htup->t_choice.t_heap.t_xmin = FrozenTransactionId; + xmax = HeapTupleHeaderGetDoubleXmax(old_htup); + if (!(new_htup->t_infomask & HEAP_XMAX_IS_MULTI)) + new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(xid_base, xmax); + else + new_htup->t_choice.t_heap.t_xmax = NormalTransactionIdToShort(multi_base, xmax); + } + + new_phdr->pd_upper = upper; + + memcpy(page, newPage, PageGetPageSize(newPage)); + elog(DEBUG2, "convert heap page from double xmax to normal format"); +} /* * After removing or marking some line pointers unused, move the tuples to @@ -460,21 +609,47 @@ typedef itemIdCompactData *itemIdCompact; * Callers must ensure that nitems is > 0 */ static void -compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorted) +compactify_tuples(ItemIdCompact itemidbase, int nitems, Page page, + bool presorted, bool addspecial, bool is_toast) { PageHeader phdr = (PageHeader) page; Offset upper; Offset copy_tail; Offset copy_head; - itemIdCompact itemidptr; + ItemIdCompact itemidptr; int i; /* Code within will not work correctly if nitems == 0 */ Assert(nitems > 0); - if (presorted) + /* Add special area to the heap page if possible */ + if (addspecial) { + TransactionId xid_min = FirstNormalTransactionId, + xid_max = FirstNormalTransactionId; + MultiXactId multi_min = FirstNormalTransactionId, + multi_max = FirstNormalTransactionId; + Assert(phdr->pd_special == PageGetPageSize(page)); + + heap_page_double_xmax_get_min_max(page, &xid_min, &xid_max, + &multi_min, &multi_max); + + if (xid_max - xid_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId) && + multi_max - multi_min < (TransactionId) (MaxShortTransactionId - FirstNormalTransactionId)) + { + Assert(xid_min >= FirstNormalTransactionId); + Assert(multi_min >= FirstNormalTransactionId); + heap_page_add_special_area(itemidbase, nitems, page, + xid_min - FirstNormalTransactionId, + multi_min - FirstNormalTransactionId, + is_toast); + return; + } + } + + if (presorted) + { #ifdef USE_ASSERT_CHECKING { /* @@ -685,14 +860,14 @@ compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorte * the line pointer array following array truncation. */ void -PageRepairFragmentation(Page page) +PageRepairFragmentation(Page page, bool is_toast) { Offset pd_lower = ((PageHeader) page)->pd_lower; Offset pd_upper = ((PageHeader) page)->pd_upper; Offset pd_special = ((PageHeader) page)->pd_special; Offset last_offset; - itemIdCompactData itemidbase[MaxHeapTuplesPerPage]; - itemIdCompact itemidptr; + ItemIdCompactData itemidbase[MaxHeapTuplesPerPage]; + ItemIdCompact itemidptr; ItemId lp; int nline, nstorage, @@ -766,11 +941,30 @@ PageRepairFragmentation(Page page) nstorage = itemidptr - itemidbase; if (nstorage == 0) { + if (pd_special == PageGetPageSize(page)) + { + if (is_toast) + { + pd_special = PageGetPageSize(page) - sizeof(ToastPageSpecialData); + ((PageHeader) page)->pd_special = pd_special; + ToastPageGetSpecial(page)->pd_xid_base = 0; + } + else + { + pd_special = PageGetPageSize(page) - sizeof(HeapPageSpecialData); + ((PageHeader) page)->pd_special = pd_special; + HeapPageGetSpecial(page)->pd_xid_base = 0; + HeapPageGetSpecial(page)->pd_multi_base = 0; + } + } + /* Page is completely empty, so just reset it quickly */ ((PageHeader) page)->pd_upper = pd_special; } else { + bool addspecial = false; + /* Need to compact the page the hard way */ if (totallen > (Size) (pd_special - pd_lower)) ereport(ERROR, @@ -778,7 +972,25 @@ PageRepairFragmentation(Page page) errmsg("corrupted item lengths: total %u, available space %u", (unsigned int) totallen, pd_special - pd_lower))); - compactify_tuples(itemidbase, nstorage, page, presorted); + /* + * Try to add special area to the heap page if it has enough of free + * space. + */ + if (pd_special == PageGetPageSize(page)) + { + Size special_size, + actual_size; + + special_size = is_toast ? sizeof(ToastPageSpecialData) : + sizeof(HeapPageSpecialData); + actual_size = (Size) (pd_special - pd_lower) - totallen; + + if (actual_size >= special_size) + addspecial = true; + } + + compactify_tuples(itemidbase, nstorage, page, presorted, addspecial, + is_toast); } if (finalusedlp != nline) @@ -981,6 +1193,9 @@ PageGetHeapFreeSpace(const PageData *page) { Size space; + if (HeapPageIsDoubleXmax(page)) + return 0; + space = PageGetFreeSpace(page); if (space > 0) { @@ -1154,9 +1369,9 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems) Offset pd_upper = phdr->pd_upper; Offset pd_special = phdr->pd_special; Offset last_offset; - itemIdCompactData itemidbase[MaxIndexTuplesPerPage]; + ItemIdCompactData itemidbase[MaxIndexTuplesPerPage]; ItemIdData newitemids[MaxIndexTuplesPerPage]; - itemIdCompact itemidptr; + ItemIdCompact itemidptr; ItemId lp; int nline, nused; @@ -1264,7 +1479,12 @@ PageIndexMultiDelete(Page page, OffsetNumber *itemnos, int nitems) /* and compactify the tuple data */ if (nused > 0) - compactify_tuples(itemidbase, nused, page, presorted); + { + bool is_toast; + + is_toast = BLCKSZ - pd_special == sizeof(ToastPageSpecialData); + compactify_tuples(itemidbase, nused, page, presorted, false, is_toast); + } else phdr->pd_upper = pd_special; } diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c index fcc6981632b..cc59f8cfaa2 100644 --- a/src/backend/utils/adt/enum.c +++ b/src/backend/utils/adt/enum.c @@ -76,7 +76,7 @@ check_safe_enum_use(HeapTuple enumval_tup) * Usually, a row would get hinted as committed when it's read or loaded * into syscache; but just in case not, let's check the xmin directly. */ - xmin = HeapTupleHeaderGetXmin(enumval_tup->t_data); + xmin = HeapTupleGetXmin(enumval_tup); if (!TransactionIdIsInProgress(xmin) && TransactionIdDidCommit(xmin)) return; diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index 9f43b58dba5..3803b19e347 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -3566,6 +3566,7 @@ populate_record(TupleDesc tupdesc, tuple.t_len = HeapTupleHeaderGetDatumLength(defaultval); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple); tuple.t_data = defaultval; /* Break down the tuple into fields */ @@ -4028,6 +4029,7 @@ populate_recordset_record(PopulateRecordsetState *state, JsObject *obj) tuple.t_len = HeapTupleHeaderGetDatumLength(tuphead); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple); tuple.t_data = tuphead; tuplestore_puttuple(state->tuple_store, &tuple); diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index e15b13da078..f151df49abd 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -78,7 +78,7 @@ VXIDGetDatum(ProcNumber procNumber, LocalTransactionId lxid) * decimal respectively. Note that elog.c also knows how to format a * vxid. */ - char vxidstr[32]; + char vxidstr[64]; snprintf(vxidstr, sizeof(vxidstr), "%d/%llu", procNumber, (unsigned long long) lxid); @@ -291,7 +291,9 @@ pg_lock_status(PG_FUNCTION_ARGS) break; case LOCKTAG_TRANSACTION: values[6] = - TransactionIdGetDatum(instance->locktag.locktag_field1); + TransactionIdGetDatum( + (TransactionId) instance->locktag.locktag_field1 | + ((TransactionId) instance->locktag.locktag_field2 << 32)); nulls[1] = true; nulls[2] = true; nulls[3] = true; @@ -303,7 +305,8 @@ pg_lock_status(PG_FUNCTION_ARGS) break; case LOCKTAG_VIRTUALTRANSACTION: values[5] = VXIDGetDatum(instance->locktag.locktag_field1, - instance->locktag.locktag_field2); + (TransactionId) instance->locktag.locktag_field2 | + ((TransactionId) instance->locktag.locktag_field3 << 32)); nulls[1] = true; nulls[2] = true; nulls[3] = true; diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 97af7c6554f..910b4d71e36 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/xact.h" #include "access/xlog.h" #include "access/xlogprefetcher.h" #include "catalog/catalog.h" diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c index fe5edc0027d..863f326c122 100644 --- a/src/backend/utils/adt/rowtypes.c +++ b/src/backend/utils/adt/rowtypes.c @@ -353,6 +353,7 @@ record_out(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple); tuple.t_data = rec; /* @@ -711,6 +712,7 @@ record_send(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple); tuple.t_data = rec; /* @@ -861,10 +863,12 @@ record_cmp(FunctionCallInfo fcinfo) tuple1.t_len = HeapTupleHeaderGetDatumLength(record1); ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple1); tuple1.t_data = record1; tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple2); tuple2.t_data = record2; /* @@ -1106,10 +1110,12 @@ record_eq(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroXids(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroXids(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1386,10 +1392,12 @@ record_image_cmp(FunctionCallInfo fcinfo) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroXids(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroXids(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1632,10 +1640,12 @@ record_image_eq(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple1.t_self)); tuple1.t_tableOid = InvalidOid; tuple1.t_data = record1; + HeapTupleSetZeroXids(&tuple1); tuple2.t_len = HeapTupleHeaderGetDatumLength(record2); ItemPointerSetInvalid(&(tuple2.t_self)); tuple2.t_tableOid = InvalidOid; tuple2.t_data = record2; + HeapTupleSetZeroXids(&tuple2); /* * We arrange to look up the needed comparison info just once per series @@ -1835,6 +1845,7 @@ hash_record(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = record; + HeapTupleSetZeroXids(&tuple); /* * We arrange to look up the needed hashing info just once per series of @@ -1956,6 +1967,7 @@ hash_record_extended(PG_FUNCTION_ARGS) ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = record; + HeapTupleSetZeroXids(&tuple); /* * We arrange to look up the needed hashing info just once per series of diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c index 3d0c48769cc..cf591570377 100644 --- a/src/backend/utils/adt/xid.c +++ b/src/backend/utils/adt/xid.c @@ -35,7 +35,7 @@ xidin(PG_FUNCTION_ARGS) char *str = PG_GETARG_CSTRING(0); TransactionId result; - result = uint32in_subr(str, NULL, "xid", fcinfo->context); + result = uint64in_subr(str, NULL, "xid", fcinfo->context); PG_RETURN_TRANSACTIONID(result); } @@ -43,9 +43,9 @@ Datum xidout(PG_FUNCTION_ARGS) { TransactionId transactionId = PG_GETARG_TRANSACTIONID(0); - char *result = (char *) palloc(16); + char *result = (char *) palloc(32); - snprintf(result, 16, "%lu", (unsigned long) transactionId); + snprintf(result, 32, "%llu", (unsigned long long) transactionId); PG_RETURN_CSTRING(result); } @@ -56,8 +56,13 @@ Datum xidrecv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + uint32 lo, + hi; + + lo = (uint32) pq_getmsgint(buf, sizeof(TransactionId)); + hi = (uint32) pq_getmsgint(buf, sizeof(TransactionId)); - PG_RETURN_TRANSACTIONID((TransactionId) pq_getmsgint(buf, sizeof(TransactionId))); + PG_RETURN_TRANSACTIONID((uint64) lo + ((uint64) hi << 32)); } /* @@ -68,9 +73,15 @@ xidsend(PG_FUNCTION_ARGS) { TransactionId arg1 = PG_GETARG_TRANSACTIONID(0); StringInfoData buf; + uint32 lo, + hi; + + lo = (uint32) (arg1 & 0xFFFFFFFF); + hi = (uint32) (arg1 >> 32); pq_begintypsend(&buf); - pq_sendint32(&buf, arg1); + pq_sendint(&buf, lo, sizeof(lo)); + pq_sendint(&buf, hi, sizeof(hi)); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } @@ -121,9 +132,9 @@ xid_age(PG_FUNCTION_ARGS) /* Permanent XIDs are always infinitely old */ if (!TransactionIdIsNormal(xid)) - PG_RETURN_INT32(INT_MAX); + PG_RETURN_INT64(PG_INT8_MAX); - PG_RETURN_INT32((int32) (now - xid)); + PG_RETURN_INT64((int64) (now - xid)); } /* @@ -136,9 +147,9 @@ mxid_age(PG_FUNCTION_ARGS) MultiXactId now = ReadNextMultiXactId(); if (!MultiXactIdIsValid(xid)) - PG_RETURN_INT32(INT_MAX); + PG_RETURN_INT64(PG_INT8_MAX); - PG_RETURN_INT32((int32) (now - xid)); + PG_RETURN_INT64((int64) (now - xid)); } /* @@ -198,7 +209,7 @@ xid8in(PG_FUNCTION_ARGS) uint64 result; result = uint64in_subr(str, NULL, "xid8", fcinfo->context); - PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(result)); + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromXid(result)); } Datum @@ -207,7 +218,7 @@ xid8out(PG_FUNCTION_ARGS) FullTransactionId fxid = PG_GETARG_FULLTRANSACTIONID(0); char *result = (char *) palloc(21); - snprintf(result, 21, UINT64_FORMAT, U64FromFullTransactionId(fxid)); + snprintf(result, 21, UINT64_FORMAT, XidFromFullTransactionId(fxid)); PG_RETURN_CSTRING(result); } @@ -218,7 +229,7 @@ xid8recv(PG_FUNCTION_ARGS) uint64 value; value = (uint64) pq_getmsgint64(buf); - PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(value)); + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromXid(value)); } Datum @@ -228,7 +239,7 @@ xid8send(PG_FUNCTION_ARGS) StringInfoData buf; pq_begintypsend(&buf); - pq_sendint64(&buf, (uint64) U64FromFullTransactionId(arg1)); + pq_sendint64(&buf, (uint64) XidFromFullTransactionId(arg1)); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c index 88d798fbf4b..e2539cb0bf2 100644 --- a/src/backend/utils/adt/xid8funcs.c +++ b/src/backend/utils/adt/xid8funcs.c @@ -86,8 +86,7 @@ StaticAssertDecl(MAX_BACKENDS * 2 <= PG_SNAPSHOT_MAX_NXIP, * It is an ERROR if the xid is in the future. Otherwise, returns true if * the transaction is still new enough that we can determine whether it * committed and false otherwise. If *extracted_xid is not NULL, it is set - * to the low 32 bits of the transaction ID (i.e. the actual XID, without the - * epoch). + * to the actual transaction ID. * * The caller must hold XactTruncationLock since it's dealing with arbitrary * XIDs, and must continue to hold it until it's done with any clog lookups @@ -97,11 +96,13 @@ static bool TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) { TransactionId xid = XidFromFullTransactionId(fxid); + TransactionId next_xid __attribute__ ((unused)); FullTransactionId now_fullxid; TransactionId oldest_clog_xid; FullTransactionId oldest_clog_fxid; now_fullxid = ReadNextFullTransactionId(); + next_xid = XidFromFullTransactionId(now_fullxid); if (extracted_xid != NULL) *extracted_xid = xid; @@ -118,7 +119,7 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("transaction ID %llu is in the future", - (unsigned long long) U64FromFullTransactionId(fxid)))); + (unsigned long long) XidFromFullTransactionId(fxid)))); /* * TransamVariables->oldestClogXid is protected by XactTruncationLock, but @@ -141,8 +142,8 @@ TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid) * not that happened, oldestClogXid is allowable relative to now_fullxid. */ oldest_clog_xid = TransamVariables->oldestClogXid; - oldest_clog_fxid = - FullTransactionIdFromAllowableAt(now_fullxid, oldest_clog_xid); + oldest_clog_fxid = FullTransactionIdFromXid(oldest_clog_xid); + Assert(TransactionIdPrecedesOrEquals(oldest_clog_xid, next_xid)); return !FullTransactionIdPrecedes(fxid, oldest_clog_fxid); } @@ -272,12 +273,12 @@ parse_snapshot(const char *str, Node *escontext) char *endp; StringInfo buf; - xmin = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + xmin = FullTransactionIdFromXid(strtou64(str, &endp, 10)); if (*endp != ':') goto bad_format; str = endp + 1; - xmax = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + xmax = FullTransactionIdFromXid(strtou64(str, &endp, 10)); if (*endp != ':') goto bad_format; str = endp + 1; @@ -295,7 +296,7 @@ parse_snapshot(const char *str, Node *escontext) while (*str != '\0') { /* read next value */ - val = FullTransactionIdFromU64(strtou64(str, &endp, 10)); + val = FullTransactionIdFromXid(strtou64(str, &endp, 10)); str = endp; /* require the input to be in order */ @@ -373,7 +374,6 @@ pg_current_snapshot(PG_FUNCTION_ARGS) uint32 nxip, i; Snapshot cur; - FullTransactionId next_fxid = ReadNextFullTransactionId(); cur = GetActiveSnapshot(); if (cur == NULL) @@ -389,12 +389,11 @@ pg_current_snapshot(PG_FUNCTION_ARGS) * advance past any of these XIDs. Hence, these XIDs remain allowable * relative to next_fxid. */ - snap->xmin = FullTransactionIdFromAllowableAt(next_fxid, cur->xmin); - snap->xmax = FullTransactionIdFromAllowableAt(next_fxid, cur->xmax); + snap->xmin = FullTransactionIdFromXid(cur->xmin); + snap->xmax = FullTransactionIdFromXid(cur->xmax); snap->nxip = nxip; for (i = 0; i < nxip; i++) - snap->xip[i] = - FullTransactionIdFromAllowableAt(next_fxid, cur->xip[i]); + snap->xip[i] = FullTransactionIdFromXid(cur->xip[i]); /* * We want them guaranteed to be in ascending order. This also removes @@ -442,16 +441,16 @@ pg_snapshot_out(PG_FUNCTION_ARGS) initStringInfo(&str); appendStringInfo(&str, UINT64_FORMAT ":", - U64FromFullTransactionId(snap->xmin)); + XidFromFullTransactionId(snap->xmin)); appendStringInfo(&str, UINT64_FORMAT ":", - U64FromFullTransactionId(snap->xmax)); + XidFromFullTransactionId(snap->xmax)); for (i = 0; i < snap->nxip; i++) { if (i > 0) appendStringInfoChar(&str, ','); appendStringInfo(&str, UINT64_FORMAT, - U64FromFullTransactionId(snap->xip[i])); + XidFromFullTransactionId(snap->xip[i])); } PG_RETURN_CSTRING(str.data); @@ -480,8 +479,8 @@ pg_snapshot_recv(PG_FUNCTION_ARGS) if (nxip < 0 || nxip > PG_SNAPSHOT_MAX_NXIP) goto bad_format; - xmin = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); - xmax = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); + xmin = FullTransactionIdFromXid((uint64) pq_getmsgint64(buf)); + xmax = FullTransactionIdFromXid((uint64) pq_getmsgint64(buf)); if (!FullTransactionIdIsValid(xmin) || !FullTransactionIdIsValid(xmax) || FullTransactionIdPrecedes(xmax, xmin)) @@ -494,7 +493,7 @@ pg_snapshot_recv(PG_FUNCTION_ARGS) for (i = 0; i < nxip; i++) { FullTransactionId cur = - FullTransactionIdFromU64((uint64) pq_getmsgint64(buf)); + FullTransactionIdFromXid((uint64) pq_getmsgint64(buf)); if (FullTransactionIdPrecedes(cur, last) || FullTransactionIdPrecedes(cur, xmin) || @@ -539,10 +538,10 @@ pg_snapshot_send(PG_FUNCTION_ARGS) pq_begintypsend(&buf); pq_sendint32(&buf, snap->nxip); - pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmin)); - pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmax)); + pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xmin)); + pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xmax)); for (i = 0; i < snap->nxip; i++) - pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xip[i])); + pq_sendint64(&buf, (int64) XidFromFullTransactionId(snap->xip[i])); PG_RETURN_BYTEA_P(pq_endtypsend(&buf)); } @@ -630,8 +629,7 @@ pg_snapshot_xip(PG_FUNCTION_ARGS) * Report the status of a recent transaction ID, or null for wrapped, * truncated away or otherwise too old XIDs. * - * The passed epoch-qualified xid is treated as a normal xid, not a - * multixact id. + * The passed xid is treated as a normal xid, not a multixact id. * * If it points to a committed subxact the result is the subxact status even * though the parent xact may still be in progress or may have aborted. diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 9ad7681f155..8400c13ed20 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -2191,6 +2191,7 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments, memcpy((char *) ct->tuple.t_data, (const char *) dtp->t_data, dtp->t_len); + HeapTupleCopyXids(&ct->tuple, dtp); MemoryContextSwitchTo(oldcxt); if (dtp != ntp) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 9f54a9e72b7..3011f5571e0 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -2320,8 +2320,7 @@ RelationReloadIndexInfo(Relation relation) relation->rd_index->indisreplident = index->indisreplident; /* Copy xmin too, as that is needed to make sense of indcheckxmin */ - HeapTupleHeaderSetXmin(relation->rd_indextuple->t_data, - HeapTupleHeaderGetXmin(tuple->t_data)); + HeapTupleSetXmin(relation->rd_indextuple, HeapTupleGetXmin(tuple)); ReleaseSysCache(tuple); } diff --git a/src/backend/utils/fmgr/fmgr.c b/src/backend/utils/fmgr/fmgr.c index 782291d9998..fef59966927 100644 --- a/src/backend/utils/fmgr/fmgr.c +++ b/src/backend/utils/fmgr/fmgr.c @@ -526,7 +526,7 @@ lookup_C_func(HeapTuple procedureTuple) NULL); if (entry == NULL) return NULL; /* no such entry */ - if (entry->fn_xmin == HeapTupleHeaderGetRawXmin(procedureTuple->t_data) && + if (entry->fn_xmin == HeapTupleGetRawXmin(procedureTuple) && ItemPointerEquals(&entry->fn_tid, &procedureTuple->t_self)) return entry; /* OK */ return NULL; /* entry is out of date */ @@ -562,7 +562,7 @@ record_C_func(HeapTuple procedureTuple, HASH_ENTER, &found); /* OID is already filled in */ - entry->fn_xmin = HeapTupleHeaderGetRawXmin(procedureTuple->t_data); + entry->fn_xmin = HeapTupleGetRawXmin(procedureTuple); entry->fn_tid = procedureTuple->t_self; entry->user_fn = user_fn; entry->inforec = inforec; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 5d173c83c58..d4d15c0b11f 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -2780,65 +2780,6 @@ struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, - { - {"vacuum_freeze_min_age", PGC_USERSET, VACUUM_FREEZING, - gettext_noop("Minimum age at which VACUUM should freeze a table row."), - NULL - }, - &vacuum_freeze_min_age, - 50000000, 0, 1000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_freeze_table_age", PGC_USERSET, VACUUM_FREEZING, - gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."), - NULL - }, - &vacuum_freeze_table_age, - 150000000, 0, 2000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_multixact_freeze_min_age", PGC_USERSET, VACUUM_FREEZING, - gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."), - NULL - }, - &vacuum_multixact_freeze_min_age, - 5000000, 0, 1000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_multixact_freeze_table_age", PGC_USERSET, VACUUM_FREEZING, - gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."), - NULL - }, - &vacuum_multixact_freeze_table_age, - 150000000, 0, 2000000000, - NULL, NULL, NULL - }, - - { - {"vacuum_failsafe_age", PGC_USERSET, VACUUM_FREEZING, - gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."), - NULL - }, - &vacuum_failsafe_age, - 1600000000, 0, 2100000000, - NULL, NULL, NULL - }, - { - {"vacuum_multixact_failsafe_age", PGC_USERSET, VACUUM_FREEZING, - gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."), - NULL - }, - &vacuum_multixact_failsafe_age, - 1600000000, 0, 2100000000, - NULL, NULL, NULL - }, - /* * See also CheckRequiredParameterValues() if this parameter changes */ @@ -3525,28 +3466,6 @@ struct config_int ConfigureNamesInt[] = 60, 1, INT_MAX / 1000, NULL, NULL, NULL }, - { - /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ - {"autovacuum_freeze_max_age", PGC_POSTMASTER, VACUUM_AUTOVACUUM, - gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."), - NULL - }, - &autovacuum_freeze_max_age, - - /* see vacuum_failsafe_age if you change the upper-limit value. */ - 200000000, 100000, 2000000000, - NULL, NULL, NULL - }, - { - /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ - {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, VACUUM_AUTOVACUUM, - gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."), - NULL - }, - &autovacuum_multixact_freeze_max_age, - 400000000, 10000, 2000000000, - NULL, NULL, NULL - }, { /* see max_connections */ {"autovacuum_worker_slots", PGC_POSTMASTER, VACUUM_AUTOVACUUM, @@ -3822,7 +3741,6 @@ struct config_int ConfigureNamesInt[] = SCRAM_SHA_256_DEFAULT_ITERATIONS, 1, INT_MAX, NULL, NULL, NULL }, - /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL @@ -3832,6 +3750,87 @@ struct config_int ConfigureNamesInt[] = struct config_int64 ConfigureNamesInt64[] = { + { + {"vacuum_freeze_min_age", PGC_USERSET, VACUUM_FREEZING, + gettext_noop("Minimum age at which VACUUM should freeze a table row."), + NULL + }, + &vacuum_freeze_min_age, + INT64CONST(50000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_freeze_table_age", PGC_USERSET, VACUUM_FREEZING, + gettext_noop("Age at which VACUUM should scan whole table to freeze tuples."), + NULL + }, + &vacuum_freeze_table_age, + INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_freeze_min_age", PGC_USERSET, VACUUM_FREEZING, + gettext_noop("Minimum age at which VACUUM should freeze a MultiXactId in a table row."), + NULL + }, + &vacuum_multixact_freeze_min_age, + INT64CONST(5000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_multixact_freeze_table_age", PGC_USERSET, VACUUM_FREEZING, + gettext_noop("Multixact age at which VACUUM should scan whole table to freeze tuples."), + NULL + }, + &vacuum_multixact_freeze_table_age, + INT64CONST(150000000), INT64CONST(0), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + + { + {"vacuum_failsafe_age", PGC_USERSET, VACUUM_FREEZING, + gettext_noop("Age at which VACUUM should trigger failsafe to avoid a wraparound outage."), + NULL + }, + &vacuum_failsafe_age, + INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000), + NULL, NULL, NULL + }, + { + {"vacuum_multixact_failsafe_age", PGC_USERSET, VACUUM_FREEZING, + gettext_noop("Multixact age at which VACUUM should trigger failsafe to avoid a wraparound outage."), + NULL + }, + &vacuum_multixact_failsafe_age, + INT64CONST(1600000000), INT64CONST(0), INT64CONST(2100000000), + NULL, NULL, NULL + }, + { + /* see varsup.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ + {"autovacuum_freeze_max_age", PGC_POSTMASTER, VACUUM_AUTOVACUUM, + gettext_noop("Age at which to autovacuum a table to prevent transaction ID wraparound."), + NULL + }, + &autovacuum_freeze_max_age, + + /* see vacuum_failsafe_age if you change the upper-limit value. */ + INT64CONST(10000000000), INT64CONST(100000), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + { + /* see multixact.c for why this is PGC_POSTMASTER not PGC_SIGHUP */ + {"autovacuum_multixact_freeze_max_age", PGC_POSTMASTER, VACUUM_AUTOVACUUM, + gettext_noop("Multixact age at which to autovacuum a table to prevent multixact wraparound."), + NULL + }, + &autovacuum_multixact_freeze_max_age, + INT64CONST(20000000000), INT64CONST(10000), INT64CONST(0x7FFFFFFFFFFFFFFF), + NULL, NULL, NULL + }, + { {"autovacuum_vacuum_threshold", PGC_SIGHUP, VACUUM_AUTOVACUUM, gettext_noop("Minimum number of tuple updates or deletes prior to vacuum."), diff --git a/src/backend/utils/misc/help_config.c b/src/backend/utils/misc/help_config.c index 55c36ddf051..88daad32c1c 100644 --- a/src/backend/utils/misc/help_config.c +++ b/src/backend/utils/misc/help_config.c @@ -33,6 +33,7 @@ typedef union struct config_bool _bool; struct config_real real; struct config_int integer; + struct config_int64 integer8; struct config_string string; struct config_enum _enum; } mixedStruct; @@ -106,7 +107,12 @@ printMixedStruct(mixedStruct *structToPrint) structToPrint->integer.min, structToPrint->integer.max); break; - + case PGC_INT64: + printf("INT64\t%lld\t%lld\t%lld\t", + (long long) structToPrint->integer8.reset_val, + (long long) structToPrint->integer8.min, + (long long) structToPrint->integer8.max); + break; case PGC_REAL: printf("REAL\t%g\t%g\t%g\t", structToPrint->real.reset_val, diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index b4bccf6a344..a7803cfaeff 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -117,7 +117,7 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) nulls[5] = false; values[6] = CStringGetTextDatum(psprintf("%llu", - (unsigned long long) U64FromFullTransactionId(ControlFile->checkPointCopy.nextXid))); + (unsigned long long) XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid))); nulls[6] = false; values[7] = ObjectIdGetDatum(ControlFile->checkPointCopy.nextOid); diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 0b9e3066bde..c13034b56d6 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -696,7 +696,7 @@ autovacuum_worker_slots = 16 # autovacuum worker slots to allocate #autovacuum_vacuum_max_threshold = 100000000 # max number of row updates # before vacuum; -1 disables max # threshold -#autovacuum_freeze_max_age = 200000000 # maximum XID age before forced vacuum +#autovacuum_freeze_max_age = 10000000000 # maximum XID age before forced vacuum # (change requires restart) #autovacuum_multixact_freeze_max_age = 400000000 # maximum multixact age # before forced vacuum diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c index 471d1197060..ce322b7d516 100644 --- a/src/backend/utils/sort/tuplesortvariants.c +++ b/src/backend/utils/sort/tuplesortvariants.c @@ -1475,11 +1475,16 @@ writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) { TuplesortPublic *base = TuplesortstateGetPublic(state); HeapTuple tuple = (HeapTuple) stup->tuple; - unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); + unsigned int tuplen = tuple->t_len + + sizeof(ItemPointerData) + + 2 * sizeof(TransactionId) + /* tuple xmin, xmax */ + sizeof(int); /* We need to store t_self, but not other fields of HeapTupleData */ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeWrite(tape, &tuple->t_xmin, sizeof(TransactionId)); + LogicalTapeWrite(tape, &tuple->t_xmax, sizeof(TransactionId)); LogicalTapeWrite(tape, tuple->t_data, tuple->t_len); if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); @@ -1491,7 +1496,10 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup, { TuplesortPublic *base = TuplesortstateGetPublic(state); TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg; - unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); + unsigned int t_len = tuplen - + sizeof(ItemPointerData) - + 2 * sizeof(TransactionId) - /* tuple xmin, xmax */ + sizeof(int); HeapTuple tuple = (HeapTuple) tuplesort_readtup_alloc(state, t_len + HEAPTUPLESIZE); @@ -1499,6 +1507,8 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup, tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); tuple->t_len = t_len; LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeReadExact(tape, &tuple->t_xmin, sizeof(TransactionId)); + LogicalTapeReadExact(tape, &tuple->t_xmax, sizeof(TransactionId)); /* We don't currently bother to reconstruct t_tableOid */ tuple->t_tableOid = InvalidOid; /* Read in the tuple body */ diff --git a/src/backend/utils/time/combocid.c b/src/backend/utils/time/combocid.c index 1e815571570..9f52ff0b919 100644 --- a/src/backend/utils/time/combocid.c +++ b/src/backend/utils/time/combocid.c @@ -101,12 +101,13 @@ static CommandId GetRealCmax(CommandId combocid); */ CommandId -HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup) +HeapTupleGetCmin(const HeapTupleData *tuple) { + HeapTupleHeader tup = tuple->t_data; CommandId cid = HeapTupleHeaderGetRawCommandId(tup); Assert(!(tup->t_infomask & HEAP_MOVED)); - Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tup))); + Assert(TransactionIdIsCurrentTransactionId(HeapTupleGetXmin(tuple))); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmin(cid); @@ -115,8 +116,9 @@ HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup) } CommandId -HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup) +HeapTupleGetCmax(const HeapTupleData *tuple) { + HeapTupleHeader tup = tuple->t_data; CommandId cid = HeapTupleHeaderGetRawCommandId(tup); Assert(!(tup->t_infomask & HEAP_MOVED)); @@ -128,7 +130,7 @@ HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup) * things too much. */ Assert(CritSectionCount > 0 || - TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetUpdateXid(tup))); + TransactionIdIsCurrentTransactionId(HeapTupleGetUpdateXidAny(tuple))); if (tup->t_infomask & HEAP_COMBOCID) return GetRealCmax(cid); @@ -150,9 +152,7 @@ HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup) * changes the tuple in shared buffers. */ void -HeapTupleHeaderAdjustCmax(const HeapTupleHeaderData *tup, - CommandId *cmax, - bool *iscombo) +HeapTupleAdjustCmax(const HeapTupleData *tup, CommandId *cmax, bool *iscombo) { /* * If we're marking a tuple deleted that was inserted by (any @@ -160,10 +160,10 @@ HeapTupleHeaderAdjustCmax(const HeapTupleHeaderData *tup, * Test for HeapTupleHeaderXminCommitted() first, because it's cheaper * than a TransactionIdIsCurrentTransactionId call. */ - if (!HeapTupleHeaderXminCommitted(tup) && - TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tup))) + if (!HeapTupleHeaderXminCommitted(tup->t_data) && + TransactionIdIsCurrentTransactionId(HeapTupleGetRawXmin(tup))) { - CommandId cmin = HeapTupleHeaderGetCmin(tup); + CommandId cmin = HeapTupleGetCmin(tup); *cmax = GetComboCommandId(cmin, *cmax); *iscombo = true; diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index cf021c2ecb4..ce6efa1d971 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -930,15 +930,17 @@ SnapshotResetXmin(void) if (pairingheap_is_empty(&RegisteredSnapshots)) { - MyProc->xmin = TransactionXmin = InvalidTransactionId; + pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = InvalidTransactionId); return; } minSnapshot = pairingheap_container(SnapshotData, ph_node, pairingheap_first(&RegisteredSnapshots)); - if (TransactionIdPrecedes(MyProc->xmin, minSnapshot->xmin)) - MyProc->xmin = TransactionXmin = minSnapshot->xmin; + if (TransactionIdPrecedes(pg_atomic_read_u64(&MyProc->xmin), minSnapshot->xmin)) + { + pg_atomic_write_u64(&MyProc->xmin, TransactionXmin = minSnapshot->xmin); + } } /* @@ -1088,7 +1090,7 @@ AtEOXact_Snapshot(bool isCommit, bool resetXmin) if (resetXmin) SnapshotResetXmin(); - Assert(resetXmin || MyProc->xmin == 0); + Assert(resetXmin || pg_atomic_read_u64(&MyProc->xmin) == 0); } @@ -1153,9 +1155,9 @@ ExportSnapshot(Snapshot snapshot) * Generate file path for the snapshot. We start numbering of snapshots * inside the transaction from 1. */ - snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X-%d", - MyProc->vxid.procNumber, MyProc->vxid.lxid, - list_length(exportedSnapshots) + 1); + snprintf(path, sizeof(path), SNAPSHOT_EXPORT_DIR "/%08X-%08X%08X-%d", + MyProc->vxid.procNumber, (uint32) (MyProc->vxid.lxid >> 32), + (uint32) MyProc->vxid.lxid, list_length(exportedSnapshots) + 1); /* * Copy the snapshot into TopTransactionContext, add it to the @@ -1330,7 +1332,7 @@ parseXidFromText(const char *prefix, char **s, const char *filename) (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; - if (sscanf(ptr, "%u", &val) != 1) + if (sscanf(ptr, "%" PRIu64 "u", &val) != 1) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); @@ -1355,7 +1357,7 @@ parseVxidFromText(const char *prefix, char **s, const char *filename, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); ptr += prefixlen; - if (sscanf(ptr, "%d/%u", &vxid->procNumber, &vxid->localTransactionId) != 2) + if (sscanf(ptr, "%d/%" PRIu64 "u", &vxid->procNumber, &vxid->localTransactionId) != 2) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid snapshot data in file \"%s\"", filename))); @@ -1892,7 +1894,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) if (!snapshot->suboverflowed) { /* we have full data, so search subxip */ - if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) + if (pg_lfind64(xid, snapshot->subxip, snapshot->subxcnt)) return true; /* not there, fall through to search xip[] */ @@ -1914,7 +1916,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) return false; } - if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt)) + if (pg_lfind64(xid, snapshot->xip, snapshot->xcnt)) return true; } else @@ -1948,7 +1950,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) * indeterminate xid. We don't know whether it's top level or subxact * but it doesn't matter. If it's present, the xid is visible. */ - if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) + if (pg_lfind64(xid, snapshot->subxip, snapshot->subxcnt)) return true; } diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl index 31fa6454aee..c4ecdd1fc70 100644 --- a/src/bin/pg_amcheck/t/004_verify_heapam.pl +++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl @@ -8,6 +8,7 @@ use PostgreSQL::Test::Cluster; use PostgreSQL::Test::Utils; use Test::More; +use Data::Dumper; # This regression test demonstrates that the pg_amcheck binary correctly # identifies specific kinds of corruption within pages. To test this, we need @@ -85,6 +86,65 @@ use Test::More; use constant HEAPTUPLE_PACK_CODE => 'LLLSSSSSCCLLCCCCCCCCCCllLL'; use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size +use constant HEAPPAGE_SPECIAL_PACK_CODE => 'QQ'; +use constant HEAPPAGE_SPECIAL_PACK_LENGTH => 16; +use constant HEAPPAGE_SIZE => 8192; + +# Some #define constants from access/htup_details.h for use while corrupting. +use constant HEAP_HASNULL => 0x0001; +use constant HEAP_XMAX_LOCK_ONLY => 0x0080; +use constant HEAP_XMIN_COMMITTED => 0x0100; +use constant HEAP_XMIN_INVALID => 0x0200; +use constant HEAP_XMAX_COMMITTED => 0x0400; +use constant HEAP_XMAX_INVALID => 0x0800; +use constant HEAP_NATTS_MASK => 0x07FF; +use constant HEAP_XMAX_IS_MULTI => 0x1000; +use constant HEAP_KEYS_UPDATED => 0x2000; +use constant HEAP_HOT_UPDATED => 0x4000; +use constant HEAP_ONLY_TUPLE => 0x8000; +use constant HEAP_UPDATED => 0x2000; + +use constant FIRST_NORMAL_TRANSACTION_ID => 3; + +# Read page special data +sub read_special_data +{ + my ($fh, $offset) = @_; + my ($buffer, %special); + + $offset -= $offset % HEAPPAGE_SIZE; + $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH; + + sysseek($fh, $offset, 0) + or BAIL_OUT("sysseek failed: $!"); + defined(sysread($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH)) + or BAIL_OUT("sysread failed: $!"); + + @_ = unpack(HEAPPAGE_SPECIAL_PACK_CODE, $buffer); + %special = ( + pd_xid_base => shift, + pd_multi_base => shift); + return \%special; +} + +# Write page special data +sub write_special_data +{ + my ($fh, $offset, $special) = @_; + + $offset -= $offset % HEAPPAGE_SIZE; + $offset += HEAPPAGE_SIZE - HEAPPAGE_SPECIAL_PACK_LENGTH; + + my $buffer = pack( + HEAPPAGE_SPECIAL_PACK_CODE, + $special->{pd_xid_base}, $special->{pd_multi_base}); + + sysseek($fh, $offset, 0) + or BAIL_OUT("sysseek failed: $!"); + defined(syswrite($fh, $buffer, HEAPPAGE_SPECIAL_PACK_LENGTH)) + or BAIL_OUT("syswrite failed: $!"); + return; +} # Read a tuple of our table from a heap page. # @@ -96,8 +156,9 @@ use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size # sub read_tuple { - my ($fh, $offset) = @_; + my ($fh, $offset, $raw) = @_; my ($buffer, %tup); + sysseek($fh, $offset, 0) or BAIL_OUT("sysseek failed: $!"); defined(sysread($fh, $buffer, HEAPTUPLE_PACK_LENGTH)) @@ -133,6 +194,18 @@ sub read_tuple c_va_toastrelid => shift); # Stitch together the text for column 'b' $tup{b} = join('', map { chr($tup{"b_body$_"}) } (1 .. 7)); + + if (!$raw) + { + my $special = read_special_data($fh, $offset); + + $tup{t_xmin} += $special->{pd_xid_base}; + my $is_multi = $tup{t_infomask} & HEAP_XMAX_IS_MULTI; + $tup{t_xmax} += !$is_multi ? + $special->{pd_xid_base} : + $special->{pd_multi_base}; + } + return \%tup; } @@ -148,7 +221,39 @@ sub read_tuple # sub write_tuple { - my ($fh, $offset, $tup) = @_; + my ($fh, $offset, $tup, $raw) = @_; + + if (!$raw) + { + my $special = read_special_data($fh, $offset); + + if ($tup->{t_xmin} >= 3) + { + my $xmin = $tup->{t_xmin} - $special->{pd_xid_base}; + die "tuple x_min $tup->{t_xmin} is too smal for pd_xid_base $special->{pd_xid_base}" + if $xmin < 3; + $tup->{t_xmin} = $xmin; + } + + if ($tup->{t_xmax} >= 3) + { + if (($tup->{t_infomask} & HEAP_XMAX_IS_MULTI) == 0) + { + my $xmax = $tup->{t_xmax} - $special->{pd_xid_base}; + die "tuple x_max $tup->{t_xmax} is too smal for pd_xid_base $special->{pd_xid_base}" + if $xmax < 3; + $tup->{t_xmax} = $xmax; + } + else + { + my $xmax = $tup->{t_xmax} - $special->{pd_multi_base}; + die "tuple multi x_max $tup->{t_xmax} is too smal for pd_multi_base $special->{pd_multi_base}" + if $xmax < 3; + $tup->{t_xmax} = $xmax; + } + } + } + my $buffer = pack( HEAPTUPLE_PACK_CODE, $tup->{t_xmin}, $tup->{t_xmax}, @@ -171,6 +276,42 @@ sub write_tuple return; } +# move pd_xid_base and pd_multi_base to more suitable position for tests. +sub fixup_page +{ + my ($fh, $page, $xid_base, $multi_base, $lp_off) = @_; + my $offset = $page * HEAPPAGE_SIZE; + my $special = read_special_data($fh, $offset); + + die "xid_base $xid_base should be lesser than existed $special->{pd_xid_base}" + if ($xid_base > $special->{pd_xid_base}); + die "multi_base $multi_base should be lesser than existed $special->{pd_multi_base}" + if ($multi_base > $special->{pd_multi_base} && $special->{pd_multi_base} != 0); + return if ($xid_base == $special->{pd_xid_base} && + $multi_base == $special->{pd_multi_base}); + + my $xid_delta = $special->{pd_xid_base} - $xid_base; + my $multi_delta = $special->{pd_multi_base} - $multi_base; + + for my $off (@$lp_off) + { + # change only tuples on this page. + next if ($off < $offset && $off > $offset + HEAPPAGE_SIZE); + next if ($off == -1); + + my $tup = read_tuple($fh, $off, 1); + $tup->{t_xmin} += $xid_delta; + my $is_multi = $tup->{t_infomask} & HEAP_XMAX_IS_MULTI; + $tup->{t_xmax} += !$is_multi ? $xid_delta : $multi_delta; + write_tuple($fh, $off, $tup, 1); + } + + $special->{pd_xid_base} = $xid_base; + $special->{pd_multi_base} = $multi_base; + + write_special_data($fh, $offset, $special); +} + # Set umask so test directories and files are created with default permissions umask(0077); @@ -320,6 +461,8 @@ my $relfrozenxid = $node->safe_psql('postgres', q(select relfrozenxid from pg_class where relname = 'test')); my $datfrozenxid = $node->safe_psql('postgres', q(select datfrozenxid from pg_database where datname = 'postgres')); +my $datminmxid = $node->safe_psql('postgres', + q(select datminmxid from pg_database where datname = 'postgres')); # Sanity check that our 'test' table has a relfrozenxid newer than the # datfrozenxid for the database, and that the datfrozenxid is greater than the @@ -378,6 +521,11 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) # Determine endianness of current platform from the 1-byte varlena header $ENDIANNESS = $tup->{b_header} == 0x11 ? "little" : "big"; } + +# Set 64bit xid bases a bit in the past therefore we can set xmin/xmax a bit +# in the past +fixup_page($file, 0, $datfrozenxid - 100, $datminmxid, \@lp_off); + close($file) or BAIL_OUT("close failed: $!"); $node->start; @@ -396,20 +544,6 @@ $node->command_ok( $node->stop; -# Some #define constants from access/htup_details.h for use while corrupting. -use constant HEAP_HASNULL => 0x0001; -use constant HEAP_XMAX_LOCK_ONLY => 0x0080; -use constant HEAP_XMIN_COMMITTED => 0x0100; -use constant HEAP_XMIN_INVALID => 0x0200; -use constant HEAP_XMAX_COMMITTED => 0x0400; -use constant HEAP_XMAX_INVALID => 0x0800; -use constant HEAP_NATTS_MASK => 0x07FF; -use constant HEAP_XMAX_IS_MULTI => 0x1000; -use constant HEAP_KEYS_UPDATED => 0x2000; -use constant HEAP_HOT_UPDATED => 0x4000; -use constant HEAP_ONLY_TUPLE => 0x8000; -use constant HEAP_UPDATED => 0x2000; - # Helper function to generate a regular expression matching the header we # expect verify_heapam() to return given which fields we expect to be non-null. sub header @@ -444,6 +578,8 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) # Read tuple, if there is one. my $tup = $offset == -1 ? undef : read_tuple($file, $offset); + # Read page special, if there is one. + my $special = $offset == -1 ? undef : read_special_data($file, $offset); if ($offnum == 1) { @@ -460,7 +596,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) elsif ($offnum == 2) { # Corruptly set xmin < datfrozenxid - my $xmin = 3; + my $xmin = $datfrozenxid - 12; $tup->{t_xmin} = $xmin; $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; @@ -470,25 +606,24 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) } elsif ($offnum == 3) { - # Corruptly set xmin < datfrozenxid, further back, noting circularity - # of xid comparison. - my $xmin = 4026531839; + # Corruptly set xmin > next transaction id. + my $xmin = $relfrozenxid + 4026531839; $tup->{t_xmin} = $xmin; $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED; $tup->{t_infomask} &= ~HEAP_XMIN_INVALID; push @expected, - qr/${$header}xmin ${xmin} precedes oldest valid transaction ID \d+/; + qr/${$header}xmin ${xmin} equals or exceeds next valid transaction ID \d+/; } elsif ($offnum == 4) { - # Corruptly set xmax < relminmxid; - my $xmax = 4026531839; + # Corruptly set xmax > relminmxid; + my $xmax = $relfrozenxid + 4026531839; $tup->{t_xmax} = $xmax; $tup->{t_infomask} &= ~HEAP_XMAX_INVALID; push @expected, - qr/${$header}xmax ${xmax} precedes oldest valid transaction ID \d+/; + qr/${$header}xmax ${xmax} equals or exceeds next valid transaction ID \d+/; } elsif ($offnum == 5) { @@ -604,7 +739,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) $tup->{t_xmax} = 4000000000; push @expected, - qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/; + qr/${header}multitransaction ID 4000000000 equals or exceeds next valid multitransaction ID 1/; } elsif ($offnum == 16) # Last offnum must equal ROWCOUNT { diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 76b77963ead..cbe1aa38a20 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -258,7 +258,7 @@ main(int argc, char *argv[]) printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile->checkPointCopy.fullPageWrites ? _("on") : _("off")); printf(_("Latest checkpoint's NextXID: %llu\n"), - (unsigned long long) U64FromFullTransactionId(ControlFile->checkPointCopy.nextXid)); + (unsigned long long) XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid)); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile->checkPointCopy.nextOid); printf(_("Latest checkpoint's NextMultiXactId: %llu\n"), diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 86586498747..d222fdaa1fb 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -41,6 +41,7 @@ #include "access/attnum.h" #include "access/sysattr.h" #include "access/transam.h" +#include "c.h" #include "catalog/pg_aggregate_d.h" #include "catalog/pg_am_d.h" #include "catalog/pg_attribute_d.h" @@ -3211,7 +3212,7 @@ dumpDatabase(Archive *fout) *datistemplate, *datconnlimit, *tablespace; - uint32 frozenxid, + uint64 frozenxid, minmxid; char *qdatname; @@ -3283,8 +3284,8 @@ dumpDatabase(Archive *fout) icurules = PQgetvalue(res, 0, i_daticurules); else icurules = NULL; - frozenxid = atooid(PQgetvalue(res, 0, i_frozenxid)); - minmxid = atooid(PQgetvalue(res, 0, i_minmxid)); + frozenxid = strtou64(PQgetvalue(res, 0, i_frozenxid), NULL, 0); + minmxid = strtou64(PQgetvalue(res, 0, i_minmxid), NULL, 0); dbdacl.acl = PQgetvalue(res, 0, i_datacl); dbdacl.acldefault = PQgetvalue(res, 0, i_acldefault); datistemplate = PQgetvalue(res, 0, i_datistemplate); @@ -3593,10 +3594,16 @@ dumpDatabase(Archive *fout) RelFileNumber relfilenumber; appendPQExpBuffer(loHorizonQry, "UPDATE pg_catalog.pg_class\n" - "SET relfrozenxid = '%u', relminmxid = '%u'\n" + "SET relfrozenxid = '%llu', relminmxid = '%llu'\n" "WHERE oid = %u;\n", - atooid(PQgetvalue(lo_res, i, ii_relfrozenxid)), - atooid(PQgetvalue(lo_res, i, ii_relminmxid)), + (unsigned long long) strtou64(PQgetvalue(lo_res, + i, + ii_relfrozenxid), + NULL, 0), + (unsigned long long) strtou64(PQgetvalue(lo_res, + i, + ii_relminmxid), + NULL, 0), atooid(PQgetvalue(lo_res, i, ii_oid))); oid = atooid(PQgetvalue(lo_res, i, ii_oid)); @@ -7234,11 +7241,11 @@ getTables(Archive *fout, int *numTables) tblinfo[i].relreplident = *(PQgetvalue(res, i, i_relreplident)); tblinfo[i].rowsec = (strcmp(PQgetvalue(res, i, i_relrowsec), "t") == 0); tblinfo[i].forcerowsec = (strcmp(PQgetvalue(res, i, i_relforcerowsec), "t") == 0); - tblinfo[i].frozenxid = atooid(PQgetvalue(res, i, i_relfrozenxid)); - tblinfo[i].toast_frozenxid = atooid(PQgetvalue(res, i, i_toastfrozenxid)); + tblinfo[i].frozenxid = strtou64(PQgetvalue(res, i, i_relfrozenxid), NULL, 0); + tblinfo[i].toast_frozenxid = strtou64(PQgetvalue(res, i, i_toastfrozenxid), NULL, 0); tblinfo[i].toast_oid = atooid(PQgetvalue(res, i, i_toastoid)); - tblinfo[i].minmxid = atooid(PQgetvalue(res, i, i_relminmxid)); - tblinfo[i].toast_minmxid = atooid(PQgetvalue(res, i, i_toastminmxid)); + tblinfo[i].minmxid = strtou64(PQgetvalue(res, i, i_relminmxid), NULL, 0); + tblinfo[i].toast_minmxid = strtou64(PQgetvalue(res, i, i_toastminmxid), NULL, 0); tblinfo[i].reloptions = pg_strdup(PQgetvalue(res, i, i_reloptions)); if (PQgetisnull(res, i, i_checkoption)) tblinfo[i].checkoption = NULL; diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index bbdb30b5f54..293efc694a7 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -315,11 +315,11 @@ typedef struct _tableInfo bool rowsec; /* is row security enabled? */ bool forcerowsec; /* is row security forced? */ bool hasoids; /* does it have OIDs? */ - uint32 frozenxid; /* table's relfrozenxid */ - uint32 minmxid; /* table's relminmxid */ + uint64 frozenxid; /* table's relfrozenxid */ + uint64 minmxid; /* table's relminmxid */ Oid toast_oid; /* toast table's OID, or 0 if none */ - uint32 toast_frozenxid; /* toast table's relfrozenxid, if any */ - uint32 toast_minmxid; /* toast table's relminmxid */ + uint64 toast_frozenxid; /* toast table's relfrozenxid, if any */ + uint64 toast_minmxid; /* toast table's relminmxid */ int ncheck; /* # of CHECK expressions */ Oid reltype; /* OID of table's composite type, if any */ Oid reloftype; /* underlying type for typed table */ diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index f5599340259..c7285cfe35b 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -63,7 +63,6 @@ static ControlFileData ControlFile; /* pg_control values */ static XLogSegNo newXlogSegNo; /* new XLOG segment # */ static bool guessed = false; /* T if we had to guess at any values */ static const char *progname; -static uint32 set_xid_epoch = (uint32) -1; static TransactionId set_oldest_xid = 0; static TransactionId set_xid = 0; static TransactionId set_oldest_commit_ts_xid = 0; @@ -97,7 +96,6 @@ main(int argc, char *argv[]) static struct option long_options[] = { {"commit-timestamp-ids", required_argument, NULL, 'c'}, {"pgdata", required_argument, NULL, 'D'}, - {"epoch", required_argument, NULL, 'e'}, {"force", no_argument, NULL, 'f'}, {"next-wal-file", required_argument, NULL, 'l'}, {"multixact-ids", required_argument, NULL, 'm'}, @@ -140,7 +138,7 @@ main(int argc, char *argv[]) } - while ((c = getopt_long(argc, argv, "c:D:e:fl:m:no:O:u:x:", long_options, NULL)) != -1) + while ((c = getopt_long(argc, argv, "c:D:fl:m:no:O:u:x:", long_options, NULL)) != -1) { switch (c) { @@ -156,24 +154,9 @@ main(int argc, char *argv[]) noupdate = true; break; - case 'e': - errno = 0; - set_xid_epoch = strtoul(optarg, &endptr, 0); - if (endptr == optarg || *endptr != '\0' || errno != 0) - { - /*------ - translator: the second %s is a command line argument (-e, etc) */ - pg_log_error("invalid argument for option %s", "-e"); - pg_log_error_hint("Try \"%s --help\" for more information.", progname); - exit(1); - } - if (set_xid_epoch == -1) - pg_fatal("transaction ID epoch (-e) must not be -1"); - break; - case 'u': errno = 0; - set_oldest_xid = strtoul(optarg, &endptr, 0); + set_oldest_xid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-u"); @@ -187,7 +170,7 @@ main(int argc, char *argv[]) case 'x': errno = 0; - set_xid = strtoul(optarg, &endptr, 0); + set_xid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-x"); @@ -201,14 +184,14 @@ main(int argc, char *argv[]) case 'c': errno = 0; - set_oldest_commit_ts_xid = strtoul(optarg, &endptr, 0); + set_oldest_commit_ts_xid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',' || errno != 0) { pg_log_error("invalid argument for option %s", "-c"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } - set_newest_commit_ts_xid = strtoul(endptr + 1, &endptr2, 0); + set_newest_commit_ts_xid = strtou64(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-c"); @@ -244,7 +227,7 @@ main(int argc, char *argv[]) case 'm': errno = 0; - set_mxid = strtoul(optarg, &endptr, 0); + set_mxid = strtou64(optarg, &endptr, 0); if (endptr == optarg || *endptr != ',' || errno != 0) { pg_log_error("invalid argument for option %s", "-m"); @@ -252,7 +235,7 @@ main(int argc, char *argv[]) exit(1); } - set_oldestmxid = strtoul(endptr + 1, &endptr2, 0); + set_oldestmxid = strtou64(endptr + 1, &endptr2, 0); if (endptr2 == endptr + 1 || *endptr2 != '\0' || errno != 0) { pg_log_error("invalid argument for option %s", "-m"); @@ -435,11 +418,6 @@ main(int argc, char *argv[]) * Adjust fields if required by switches. (Do this now so that printout, * if any, includes these values.) */ - if (set_xid_epoch != -1) - ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(set_xid_epoch, - XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); - if (set_oldest_xid != 0) { ControlFile.checkPointCopy.oldestXid = set_oldest_xid; @@ -447,9 +425,7 @@ main(int argc, char *argv[]) } if (set_xid != 0) - ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid), - set_xid); + ControlFile.checkPointCopy.nextXid = FullTransactionIdFromXid(set_xid); if (set_oldest_commit_ts_xid != 0) ControlFile.checkPointCopy.oldestCommitTsXid = set_oldest_commit_ts_xid; @@ -688,7 +664,7 @@ GuessControlValues(void) ControlFile.checkPointCopy.PrevTimeLineID = 1; ControlFile.checkPointCopy.fullPageWrites = false; ControlFile.checkPointCopy.nextXid = - FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); + FullTransactionIdFromXid(FirstNormalTransactionId); ControlFile.checkPointCopy.nextOid = FirstGenbkiObjectId; ControlFile.checkPointCopy.nextMulti = FirstMultiXactId; ControlFile.checkPointCopy.nextMultiOffset = 0; @@ -739,6 +715,8 @@ GuessControlValues(void) * * NB: this display should be just those fields that will not be * reset by RewriteControlFile(). + * + * Special macros help to make translatable strings. */ static void PrintControlValues(bool guessed) @@ -758,8 +736,7 @@ PrintControlValues(bool guessed) ControlFile.checkPointCopy.ThisTimeLineID); printf(_("Latest checkpoint's full_page_writes: %s\n"), ControlFile.checkPointCopy.fullPageWrites ? _("on") : _("off")); - printf(_("Latest checkpoint's NextXID: %u:%llu\n"), - EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid), + printf(_("Latest checkpoint's NextXID: %llu\n"), (unsigned long long) XidFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); @@ -859,12 +836,6 @@ PrintNewControlValues(void) ControlFile.checkPointCopy.oldestXidDB); } - if (set_xid_epoch != -1) - { - printf(_("NextXID epoch: %u\n"), - EpochFromFullTransactionId(ControlFile.checkPointCopy.nextXid)); - } - if (set_oldest_commit_ts_xid != 0) { printf(_("oldestCommitTsXid: %llu\n"), diff --git a/src/bin/pg_resetwal/t/001_basic.pl b/src/bin/pg_resetwal/t/001_basic.pl index cc89e0764ae..c6821150b03 100644 --- a/src/bin/pg_resetwal/t/001_basic.pl +++ b/src/bin/pg_resetwal/t/001_basic.pl @@ -96,15 +96,6 @@ command_fails_like( [ 'pg_resetwal', '-c' => '10,1', $node->data_dir ], qr/greater than/, 'fails with -c value 1 part 2'); -# -e -command_fails_like( - [ 'pg_resetwal', '-e' => 'foo', $node->data_dir ], - qr/error: invalid argument for option -e/, - 'fails with incorrect -e option'); -command_fails_like( - [ 'pg_resetwal', '-e' => '-1', $node->data_dir ], - qr/must not be -1/, - 'fails with -e value -1'); # -l command_fails_like( [ 'pg_resetwal', '-l' => 'foo', $node->data_dir ], @@ -188,7 +179,6 @@ my $blcksz = $1; my @cmd = ('pg_resetwal', '--pgdata' => $node->data_dir); # some not-so-critical hardcoded values -push @cmd, '--epoch' => 1; push @cmd, '--next-wal-file' => '00000001000000320000004B'; push @cmd, '--next-oid' => 100_000; push @cmd, '--wal-segsize' => 1; @@ -212,8 +202,10 @@ push @cmd, '--commit-timestamp-ids' => sprintf("%d,%d", hex($files[0]) == 0 ? 3 : hex($files[0]), hex($files[-1])); +my $A = 2; +my $B = 1; @files = get_slru_files('pg_multixact/offsets'); -$mult = 32 * $blcksz / 8; +$mult = $A * $blcksz / $B; # --multixact-ids argument is "new,old" push @cmd, '--multixact-ids' => sprintf("%d,%d", @@ -221,11 +213,11 @@ push @cmd, hex($files[0]) == 0 ? 1 : hex($files[0] * $mult)); @files = get_slru_files('pg_multixact/members'); -$mult = 32 * int($blcksz / 20) * 4; +$mult = $A * int($blcksz / 20) * $B; push @cmd, '--multixact-offset' => (hex($files[-1]) + 1) * $mult; @files = get_slru_files('pg_xact'); -$mult = 32 * $blcksz * 4; +$mult = $A * $blcksz * $B; push @cmd, '--oldest-transaction-id' => (hex($files[0]) == 0 ? 3 : hex($files[0]) * $mult), diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 02d9146e5ed..d1b65eaefd0 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -29,6 +29,8 @@ static void check_new_cluster_logical_replication_slots(void); static void check_new_cluster_subscription_configuration(void); static void check_old_cluster_for_valid_slots(void); static void check_old_cluster_subscription_state(void); +static void check_for_32bit_xid_usage(ClusterInfo *cluster); +static bool is_xid_wraparound(ClusterInfo *cluster); /* * DataTypesUsageChecks - definitions of data type checks for the old cluster @@ -583,7 +585,7 @@ output_check_banner(void) void -check_and_dump_old_cluster(void) +check_and_dump_old_cluster(bool *is_wraparound) { /* -- OLD -- */ @@ -677,6 +679,17 @@ check_and_dump_old_cluster(void) if (GET_MAJOR_VERSION(old_cluster.major_version) <= 905) check_for_pg_role_prefix(&old_cluster); + /* Prepare for 64bit xid */ + if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER) + { + /* Check if 32-bit xid type is used in tables */ + check_for_32bit_xid_usage(&old_cluster); + /* Check indexes to be upgraded */ + invalidate_spgist_indexes(&old_cluster, true); + invalidate_gin_indexes(&old_cluster, true); + invalidate_external_indexes(&old_cluster, true); + } + /* * While not a check option, we do this now because this is the only time * the old server is running. @@ -684,6 +697,8 @@ check_and_dump_old_cluster(void) if (!user_opts.check) generate_old_dump(); + *is_wraparound = is_xid_wraparound(&old_cluster); + if (!user_opts.live_check) stop_postmaster(false); } @@ -786,6 +801,17 @@ issue_warnings_and_set_wal_level(void) if (GET_MAJOR_VERSION(old_cluster.major_version) <= 906) old_9_6_invalidate_hash_indexes(&new_cluster, false); + /* Raindex for 64bit xid */ + if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER) + { + /* Check if 32-bit xid type is used in tables */ + check_for_32bit_xid_usage(&old_cluster); + /* Check indexes to be upgraded */ + invalidate_spgist_indexes(&old_cluster, true); + invalidate_gin_indexes(&old_cluster, true); + invalidate_external_indexes(&old_cluster, true); + } + report_extension_updates(&new_cluster); stop_postmaster(false); @@ -1944,6 +1970,127 @@ check_old_cluster_for_valid_slots(void) check_ok(); } +/* + * check_for_32bit_xid_usage() + * + * Current PG version changes xid storage format to 64-bit. Check if + * xid type is used in tables. + */ +static void +check_for_32bit_xid_usage(ClusterInfo *cluster) +{ + int dbnum; + FILE *script = NULL; + bool found = false; + char output_path[MAXPGPATH]; + + prep_status("Checking for incompatible \"xid\" data type"); + + snprintf(output_path, sizeof(output_path), "tables_using_xid.txt"); + + for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) + { + PGresult *res; + bool db_used = false; + int ntups; + int rowno; + int i_nspname, + i_relname, + i_attname; + DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; + PGconn *conn = connectToServer(cluster, active_db->db_name); + + /* + * While several relkinds don't store any data, e.g. views, they can + * be used to define data types of other columns, so we check all + * relkinds. + */ + res = executeQueryOrDie(conn, + "SELECT n.nspname, c.relname, a.attname " + "FROM pg_catalog.pg_class c, " + " pg_catalog.pg_namespace n, " + " pg_catalog.pg_attribute a " + "WHERE c.oid = a.attrelid AND " + " a.attnum >= 1 AND " + " a.atttypid = 'pg_catalog.xid'::pg_catalog.regtype AND " + " c.relnamespace = n.oid AND " + /* exclude possible orphaned temp tables */ + " n.nspname !~ '^pg_temp_' AND " + " n.nspname NOT IN ('pg_catalog', 'information_schema')"); + + ntups = PQntuples(res); + i_nspname = PQfnumber(res, "nspname"); + i_relname = PQfnumber(res, "relname"); + i_attname = PQfnumber(res, "attname"); + for (rowno = 0; rowno < ntups; rowno++) + { + found = true; + if (script == NULL && (script = fopen_priv(output_path, "w")) == NULL) + pg_fatal("could not open file \"%s\": %s\n", + output_path, strerror(errno)); + if (!db_used) + { + fprintf(script, "Database: %s\n", active_db->db_name); + db_used = true; + } + fprintf(script, " %s.%s.%s\n", + PQgetvalue(res, rowno, i_nspname), + PQgetvalue(res, rowno, i_relname), + PQgetvalue(res, rowno, i_attname)); + } + + PQclear(res); + + PQfinish(conn); + } + + if (script) + fclose(script); + + if (found) + { + pg_log(PG_REPORT, "fatal"); + pg_fatal("Your installation contains the \"xid\" data type in user tables.\n" + "The internal format of \"xid\" changed in Postgres Pro Enterprise so this cluster\n" + "cannot currently be upgraded. Note that even dropped attributes cause a problem.\n" + "You can remove the problem tables and restart the upgrade.\n" + "A list of the problem columns is in the file:\n" + " %s", output_path); + } + else + check_ok(); +} + +/* + * is_xid_wraparound() + * + * Return true if 32-xid cluster had wraparound. + */ +static bool +is_xid_wraparound(ClusterInfo *cluster) +{ + PGconn *conn; + PGresult *res; + bool is_wraparound; + + conn = connectToServer(cluster, "template1"); + + /* + * txid_current is extended with an "epoch" counter, so to check + * wraparound in old 32-xid cluster we cut epoch by casting to int4. + */ + res = executeQueryOrDie(conn, + "SELECT 1 " + "FROM pg_catalog.pg_database, txid_current() tx " + "WHERE (tx %% 4294967295)::bigint <= datfrozenxid::text::bigint " + "LIMIT 1"); + is_wraparound = PQntuples(res) ? true : false; + PQclear(res); + PQfinish(conn); + + return is_wraparound; +} + /* * Callback function for processing results of query for * check_old_cluster_subscription_state()'s UpgradeTask. If the query returned diff --git a/src/bin/pg_upgrade/controldata.c b/src/bin/pg_upgrade/controldata.c index 357f64212d6..c154f68d763 100644 --- a/src/bin/pg_upgrade/controldata.c +++ b/src/bin/pg_upgrade/controldata.c @@ -289,6 +289,8 @@ get_control_data(ClusterInfo *cluster) xid.value = strtou64(p, NULL, 10); /* + * Try to read 32-bit XID format 'epoch:xid'. + * * Delimiter changed from '/' to ':' in 9.6. We don't test for * the catalog version of the change because the catalog version * is pulled from pg_controldata too, and it isn't worth adding an @@ -304,8 +306,7 @@ get_control_data(ClusterInfo *cluster) if (p == NULL) { /* FullTransactionId representation */ - cluster->controldata.chkpnt_nxtxid = XidFromFullTransactionId(xid); - cluster->controldata.chkpnt_nxtepoch = EpochFromFullTransactionId(xid); + cluster->controldata.chkpnt_nxtxid = xid.value; } else { @@ -314,8 +315,8 @@ get_control_data(ClusterInfo *cluster) /* Epoch:Xid representation */ p++; /* remove '/' or ':' char */ - cluster->controldata.chkpnt_nxtxid = str2uint(p); - cluster->controldata.chkpnt_nxtepoch = (TransactionId) XidFromFullTransactionId(xid); + cluster->controldata.chkpnt_nxtxid = (XidFromFullTransactionId(xid)) << 32 | + (TransactionId) str2uint(p); } got_xid = true; @@ -339,7 +340,7 @@ get_control_data(ClusterInfo *cluster) pg_fatal("%d: controldata retrieval problem", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_nxtmulti = str2uint(p); + cluster->controldata.chkpnt_nxtmulti = strtou64(p, NULL, 10); got_multi = true; } else if ((p = strstr(bufin, "Latest checkpoint's oldestXID:")) != NULL) @@ -350,7 +351,7 @@ get_control_data(ClusterInfo *cluster) pg_fatal("%d: controldata retrieval problem", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_oldstxid = str2uint(p); + cluster->controldata.chkpnt_oldstxid = strtou64(p, NULL, 10); got_oldestxid = true; } else if ((p = strstr(bufin, "Latest checkpoint's oldestMultiXid:")) != NULL) @@ -361,7 +362,7 @@ get_control_data(ClusterInfo *cluster) pg_fatal("%d: controldata retrieval problem", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_oldstMulti = str2uint(p); + cluster->controldata.chkpnt_oldstMulti = strtou64(p, NULL, 10); got_oldestmulti = true; } else if ((p = strstr(bufin, "Latest checkpoint's NextMultiOffset:")) != NULL) @@ -372,7 +373,7 @@ get_control_data(ClusterInfo *cluster) pg_fatal("%d: controldata retrieval problem", __LINE__); p++; /* remove ':' char */ - cluster->controldata.chkpnt_nxtmxoff = str2uint(p); + cluster->controldata.chkpnt_nxtmxoff = strtou64(p, NULL, 10); got_mxoff = true; } else if ((p = strstr(bufin, "First log segment after reset:")) != NULL) diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c index 91ed16acb08..c078a1dc2ec 100644 --- a/src/bin/pg_upgrade/file.c +++ b/src/bin/pg_upgrade/file.c @@ -214,7 +214,8 @@ linkFile(const char *src, const char *dst, */ void rewriteVisibilityMap(const char *fromfile, const char *tofile, - const char *schemaName, const char *relName) + const char *schemaName, const char *relName, + bool update_version) { int src_fd; int dst_fd; @@ -330,6 +331,11 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, if (old_lastpart && empty) break; + if (update_version) + PageSetPageSizeAndVersion((Page) new_vmbuf.data, + PageGetPageSize((Page) new_vmbuf.data), + PG_PAGE_LAYOUT_VERSION); + /* Set new checksum for visibility map page, if enabled */ if (new_cluster.controldata.data_checksum_version != 0) ((PageHeader) new_vmbuf.data)->pd_checksum = @@ -356,6 +362,97 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile, close(src_fd); } +/* + * updateSegmentVersion() + * + * Transform a segment file, copying from src to dst. + * schemaName/relName are relation's SQL name (used for error messages only). + * + * Read segment pages one by one and set version to PG_PAGE_LAYOUT_VERSION. + * + * Although FSM and MV formats does not change while switch to 64-bit XIDs, we + * must upgrade pages version in order to avoid lazy conversion on first read. + */ +void +updateSegmentPagesVersion(const char *fromfile, const char *tofile, + const char *schemaName, const char *relName) +{ + int src_fd; + int dst_fd; + struct stat statbuf; + ssize_t src_filesize; + ssize_t totalBytesRead; + ssize_t bytesRead; + BlockNumber blkno; + PGAlignedBlock buf; + + if ((src_fd = open(fromfile, O_RDONLY | PG_BINARY, 0)) < 0) + pg_fatal("error while copying relation \"%s.%s\": could not open file \"%s\": %s", + schemaName, relName, fromfile, strerror(errno)); + + if (fstat(src_fd, &statbuf) != 0) + pg_fatal("error while copying relation \"%s.%s\": could not stat file \"%s\": %s", + schemaName, relName, fromfile, strerror(errno)); + + if ((dst_fd = open(tofile, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + pg_file_create_mode)) < 0) + pg_fatal("error while copying relation \"%s.%s\": could not create file \"%s\": %s", + schemaName, relName, tofile, strerror(errno)); + + /* Save old file size */ + src_filesize = statbuf.st_size; + totalBytesRead = 0; + blkno = 0; + + while (totalBytesRead < src_filesize) + { + errno = 0; + if ((bytesRead = read(src_fd, buf.data, BLCKSZ)) != BLCKSZ) + { + if (bytesRead < 0) + pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s", + schemaName, relName, fromfile, strerror(errno)); + else + pg_fatal("error while copying relation \"%s.%s\": partial page found in file \"%s\"", + schemaName, relName, fromfile); + } + + totalBytesRead += BLCKSZ; + PageSetPageSizeAndVersion((Page) buf.data, + PageGetPageSize((Page) buf.data), + PG_PAGE_LAYOUT_VERSION); + + /* Set new checksum for page, if enabled */ + if (new_cluster.controldata.data_checksum_version != 0) + ((PageHeader) buf.data)->pd_checksum = + pg_checksum_page(buf.data, blkno); + + /* + * We dealing here only with FSM and VM pages. + */ + if (((PageHeader) buf.data)->pd_lower != SizeOfPageHeaderData || + ((PageHeader) buf.data)->pd_upper != BLCKSZ) + pg_fatal("error while copying relation \"%s.%s\": unknown page format found in file \"%s\"", + schemaName, relName, fromfile); + + errno = 0; + if (write(dst_fd, buf.data, BLCKSZ) != BLCKSZ) + { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + pg_fatal("error while copying relation \"%s.%s\": could not write file \"%s\": %s", + schemaName, relName, tofile, strerror(errno)); + } + + blkno++; + } + + /* Clean up */ + close(dst_fd); + close(src_fd); +} + void check_file_clone(void) { diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index 34ee5bdb2e6..ebeb0bb6b95 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -40,6 +40,9 @@ #include +#include "access/multixact.h" +#include "access/transam.h" +#include "access/xlog_internal.h" #include "catalog/pg_class_d.h" #include "common/file_perm.h" #include "common/logging.h" @@ -58,7 +61,7 @@ static void set_new_cluster_char_signedness(void); static void set_locale_and_encoding(void); static void prepare_new_cluster(void); static void prepare_new_globals(void); -static void create_new_objects(void); +static void create_new_objects(bool is_wraparound); static void copy_xact_xlog_xid(void); static void set_frozenxids(bool minmxid_only); static void make_outputdirs(char *pgdata); @@ -85,6 +88,7 @@ int main(int argc, char **argv) { char *deletion_script_file_name = NULL; + bool is_wraparound = false; /* * pg_upgrade doesn't currently use common/logging.c, but initialize it @@ -130,7 +134,7 @@ main(int argc, char **argv) check_cluster_compatibility(); - check_and_dump_old_cluster(); + check_and_dump_old_cluster(&is_wraparound); /* -- NEW -- */ @@ -164,7 +168,7 @@ main(int argc, char **argv) prepare_new_globals(); - create_new_objects(); + create_new_objects(is_wraparound); stop_postmaster(false); @@ -563,7 +567,7 @@ prepare_new_globals(void) static void -create_new_objects(void) +create_new_objects(bool is_wraparound) { int dbnum; PGconn *conn_new_template1; @@ -685,11 +689,23 @@ create_new_objects(void) check_ok(); /* - * We don't have minmxids for databases or relations in pre-9.3 clusters, - * so set those after we have restored the schema. + * Refix datfrozenxid and datminmxid */ if (GET_MAJOR_VERSION(old_cluster.major_version) <= 902) set_frozenxids(true); + else if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER && + new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER) + { + /* + * During upgrade from 32-bit to 64-bit xids save relfrozenxids if + * there was no wraparound in old cluster. Otherwise, reset them to + * FirstNormalTransactionId value. + */ + if (is_wraparound) + set_frozenxids(false); + else + set_frozenxids(true); + } /* update new_cluster info now that we have objects in the databases */ get_db_rel_and_slot_infos(&new_cluster); @@ -743,14 +759,37 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir) static void copy_xact_xlog_xid(void) { - /* - * Copy old commit logs to new data dir. pg_clog has been renamed to - * pg_xact in post-10 clusters. - */ - copy_subdir_files(GET_MAJOR_VERSION(old_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact", - GET_MAJOR_VERSION(new_cluster.major_version) <= 906 ? - "pg_clog" : "pg_xact"); + TransactionId next_xid; + +#define GetClogDirName(cluster) \ + GET_MAJOR_VERSION(cluster.major_version) <= 906 ? "pg_clog" : "pg_xact" + + /* Set next xid to 2^32 if we're upgrading from 32 bit postgres */ + if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER && + new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER) + next_xid = ((TransactionId) 1 << 32); + else + next_xid = old_cluster.controldata.chkpnt_nxtxid; + + if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER && + new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER) + { + /* Convert commit logs and copy to the new data dir */ + prep_status("Transforming commit log segments"); + convert_xact(psprintf("%s/%s", old_cluster.pgdata, GetClogDirName(old_cluster)), + psprintf("%s/%s", new_cluster.pgdata, GetClogDirName(new_cluster))); + check_ok(); + } + else + { + /* + * Copy old commit logs to new data dir. pg_clog has been renamed to + * pg_xact in post-10 clusters. + */ + prep_status("Copying commit log segments"); + copy_subdir_files(GetClogDirName(old_cluster), GetClogDirName(new_cluster)); + check_ok(); + } prep_status("Setting oldest XID for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, @@ -764,19 +803,20 @@ copy_xact_xlog_xid(void) prep_status("Setting next transaction ID and epoch for new cluster"); exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -f -x %llu \"%s\"", - new_cluster.bindir, - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid, + new_cluster.bindir, (unsigned long long) next_xid, new_cluster.pgdata); +#ifdef NOT_USED exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -f -e %u \"%s\"", new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch, new_cluster.pgdata); +#endif /* must reset commit timestamp limits also */ exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -f -c %llu,%llu \"%s\"", new_cluster.bindir, - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid, - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid, + (unsigned long long) next_xid, + (unsigned long long) next_xid, new_cluster.pgdata); check_ok(); @@ -789,6 +829,10 @@ copy_xact_xlog_xid(void) if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER && new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER) { + uint64 oldest_mxid = old_cluster.controldata.chkpnt_oldstMulti; + uint64 next_mxid = old_cluster.controldata.chkpnt_nxtmulti; + uint64 next_mxoff = old_cluster.controldata.chkpnt_nxtmxoff; + /* * If the old server is before the MULTIXACTOFFSET_FORMATCHANGE_CAT_VER * it must have 32-bit multixid offsets, thus it should be converted. @@ -835,9 +879,9 @@ copy_xact_xlog_xid(void) exec_prog(UTILITY_LOG_FILE, NULL, true, true, "\"%s/pg_resetwal\" -O %llu -m %llu,%llu \"%s\"", new_cluster.bindir, - (unsigned long long) old_cluster.controldata.chkpnt_nxtmxoff, - (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti, - (unsigned long long) old_cluster.controldata.chkpnt_oldstMulti, + (unsigned long long) next_mxoff, + (unsigned long long) next_mxid, + (unsigned long long) oldest_mxid, new_cluster.pgdata); check_ok(); } @@ -911,6 +955,8 @@ set_frozenxids(bool minmxid_only) int ntups; int i_datname; int i_datallowconn; + TransactionId frozen_xid; + MultiXactId minmxid; if (!minmxid_only) prep_status("Setting frozenxid and minmxid counters in new cluster"); @@ -919,18 +965,26 @@ set_frozenxids(bool minmxid_only) conn_template1 = connectToServer(&new_cluster, "template1"); + if (old_cluster.controldata.cat_ver < XID_FORMATCHANGE_CAT_VER && + new_cluster.controldata.cat_ver >= XID_FORMATCHANGE_CAT_VER) + frozen_xid = FirstNormalTransactionId; + else + frozen_xid = old_cluster.controldata.chkpnt_nxtxid; + + minmxid = old_cluster.controldata.chkpnt_nxtmulti; + if (!minmxid_only) /* set pg_database.datfrozenxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " "SET datfrozenxid = '%llu'", - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid)); + (unsigned long long) frozen_xid)); /* set pg_database.datminmxid */ PQclear(executeQueryOrDie(conn_template1, "UPDATE pg_catalog.pg_database " "SET datminmxid = '%llu'", - (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti)); + (unsigned long long) minmxid)); /* get database names */ dbres = executeQueryOrDie(conn_template1, @@ -970,7 +1024,7 @@ set_frozenxids(bool minmxid_only) CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ", " CppAsString2(RELKIND_TOASTVALUE) ")", - (unsigned long long) old_cluster.controldata.chkpnt_nxtxid)); + (unsigned long long) frozen_xid)); /* set pg_class.relminmxid */ PQclear(executeQueryOrDie(conn, @@ -981,7 +1035,7 @@ set_frozenxids(bool minmxid_only) CppAsString2(RELKIND_RELATION) ", " CppAsString2(RELKIND_MATVIEW) ", " CppAsString2(RELKIND_TOASTVALUE) ")", - (unsigned long long) old_cluster.controldata.chkpnt_nxtmulti)); + (unsigned long long) minmxid)); PQfinish(conn); /* Reset datallowconn flag */ diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 2d4f1d39e55..d1cc7d69eda 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -121,6 +121,11 @@ extern char *output_files[]; */ #define MULTIXACTOFFSET_FORMATCHANGE_CAT_VER 202409041 +/* + * xid format changed from 32-bit to 64-bit. + */ +#define XID_FORMATCHANGE_CAT_VER 999999999 + /* * large object chunk size added to pg_controldata, * commit 5f93c37805e7485488480916b4585e098d3cc883 @@ -238,13 +243,13 @@ typedef struct uint32 ctrl_ver; uint32 cat_ver; char nextxlogfile[25]; - uint32 chkpnt_nxtxid; - uint32 chkpnt_nxtepoch; + uint64 chkpnt_nxtxid; + uint32 chkpnt_nxtepoch; /* for 32bit xids only */ uint32 chkpnt_nxtoid; - uint32 chkpnt_nxtmulti; + uint64 chkpnt_nxtmulti; uint64 chkpnt_nxtmxoff; - uint32 chkpnt_oldstMulti; - uint32 chkpnt_oldstxid; + uint64 chkpnt_oldstMulti; + uint64 chkpnt_oldstxid; uint32 align; uint32 blocksz; uint32 largesz; @@ -385,7 +390,7 @@ extern OSInfo os_info; /* check.c */ void output_check_banner(void); -void check_and_dump_old_cluster(void); +void check_and_dump_old_cluster(bool *is_wraparound); void check_new_cluster(void); void report_clusters_compatible(void); void issue_warnings_and_set_wal_level(void); @@ -428,7 +433,10 @@ void copyFileByRange(const char *src, const char *dst, void linkFile(const char *src, const char *dst, const char *schemaName, const char *relName); void rewriteVisibilityMap(const char *fromfile, const char *tofile, - const char *schemaName, const char *relName); + const char *schemaName, const char *relName, + bool update_version); +void updateSegmentPagesVersion(const char *fromfile, const char *tofile, + const char *schemaName, const char *relName); void check_file_clone(void); void check_copy_file_range(void); void check_hard_link(transferMode transfer_mode); @@ -505,6 +513,10 @@ void old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, void report_extension_updates(ClusterInfo *cluster); +void invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode); +void invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode); +void invalidate_external_indexes(ClusterInfo *cluster, bool check_mode); + /* parallel.c */ void parallel_exec_prog(const char *log_file, const char *opt_log_file, const char *fmt,...) pg_attribute_printf(3, 4); @@ -513,6 +525,11 @@ void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr char *old_tablespace); bool reap_child(bool wait_for_child); +/* segresize.c */ + +uint64 convert_multixact_offsets(void); +void convert_xact(const char *olddir, const char *newdir); + /* task.c */ typedef void (*UpgradeTaskProcessCB) (DbInfo *dbinfo, PGresult *res, void *arg); diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c index aa205aec51d..1a7bec81fbd 100644 --- a/src/bin/pg_upgrade/relfilenumber.c +++ b/src/bin/pg_upgrade/relfilenumber.c @@ -18,7 +18,8 @@ #include "pg_upgrade.h" static void transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace); -static void transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit); +static void transfer_relfile(FileNameMap *map, const char *type_suffix, + bool vm_must_add_frozenbit, bool update_version); /* * The following set of sync_queue_* functions are used for --swap to reduce @@ -502,6 +503,7 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) { int mapnum; bool vm_must_add_frozenbit = false; + bool update_version = false; /* * Do we need to rewrite visibilitymap? @@ -524,19 +526,28 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) return; } + /* + * Need to update FSM and VM pages version to avoid lazy conversion. + */ + if (old_cluster.controldata.cat_ver < new_cluster.controldata.cat_ver) + update_version = true; + for (mapnum = 0; mapnum < size; mapnum++) { if (old_tablespace == NULL || strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0) { /* transfer primary file */ - transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit); + transfer_relfile(&maps[mapnum], "", vm_must_add_frozenbit, + update_version); /* * Copy/link any fsm and vm files, if they exist */ - transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit); - transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit); + transfer_relfile(&maps[mapnum], "_fsm", vm_must_add_frozenbit, + update_version); + transfer_relfile(&maps[mapnum], "_vm", vm_must_add_frozenbit, + update_version); } } } @@ -550,7 +561,8 @@ transfer_single_new_db(FileNameMap *maps, int size, char *old_tablespace) * mode. */ static void -transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_frozenbit) +transfer_relfile(FileNameMap *map, const char *type_suffix, + bool vm_must_add_frozenbit, bool update_version) { char old_file[MAXPGPATH]; char new_file[MAXPGPATH]; @@ -614,7 +626,17 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro /* Need to rewrite visibility map format */ pg_log(PG_VERBOSE, "rewriting \"%s\" to \"%s\"", old_file, new_file); - rewriteVisibilityMap(old_file, new_file, map->nspname, map->relname); + rewriteVisibilityMap(old_file, new_file, map->nspname, + map->relname, update_version); + } + else if ((update_version && strcmp(type_suffix, "_vm") == 0) || + (update_version && strcmp(type_suffix, "_fsm") == 0)) + { + /* Need to update pages version */ + pg_log(PG_VERBOSE, "rewriting \"%s\" to \"%s\"", + old_file, new_file); + updateSegmentPagesVersion(old_file, new_file, map->nspname, + map->relname); } else switch (user_opts.transfer_mode) diff --git a/src/bin/pg_upgrade/segresize.c b/src/bin/pg_upgrade/segresize.c index 73064c77deb..37a4efbec34 100644 --- a/src/bin/pg_upgrade/segresize.c +++ b/src/bin/pg_upgrade/segresize.c @@ -217,6 +217,76 @@ typedef uint32 MultiXactOffsetOld; #define MULTIXACT_OFFSETS_PER_PAGE_OLD (BLCKSZ / sizeof(MultiXactOffsetOld)) #define MULTIXACT_OFFSETS_PER_PAGE_NEW (BLCKSZ / sizeof(MultiXactOffset)) +/* + * Convert pg_xact segments. + */ +void +convert_xact(const char *old_subdir, const char *new_subdir) +{ +typedef uint32 TransactionId32; +#define SLRU_PAGES_PER_SEGMENT_OLD 32 +#define SLRU_PAGES_PER_SEGMENT 32 /* Should be equal to value from slru.h */ + +#define CLOG_BITS_PER_XACT 2 +#define CLOG_XACTS_PER_BYTE 4 +#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE) + +#define MaxTransactionId32 ((TransactionId32) 0xFFFFFFFF) + + SlruSegState oldseg = {0}; + SlruSegState newseg = {0}; + TransactionId oldest_xid = old_cluster.controldata.chkpnt_oldstxid; + TransactionId next_xid = old_cluster.controldata.chkpnt_nxtxid; + TransactionId xid; + int64 pageno; + char buf[BLCKSZ] = {0}; + + oldseg.dir = (char *) old_subdir; + newseg.dir = (char *) new_subdir; + + pageno = oldest_xid / CLOG_XACTS_PER_PAGE; + + oldseg.segno = pageno / SLRU_PAGES_PER_SEGMENT_OLD; + oldseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT_OLD; + + newseg.segno = pageno / SLRU_PAGES_PER_SEGMENT; + newseg.pageno = pageno % SLRU_PAGES_PER_SEGMENT; + + if (next_xid < oldest_xid) + next_xid += (TransactionId) 1 << 32; /* wraparound */ + + /* Copy xid flags reading only needed segment pages */ + for (xid = oldest_xid & ~(CLOG_XACTS_PER_PAGE - 1); + xid <= ((next_xid - 1) & ~(CLOG_XACTS_PER_PAGE - 1)); + xid += CLOG_XACTS_PER_PAGE) + { + bool is_empty; + + /* Handle possible segment wraparound */ + if (oldseg.segno > MaxTransactionId32 / CLOG_XACTS_PER_PAGE / SLRU_PAGES_PER_SEGMENT_OLD) + { + pageno = (MaxTransactionId32 + 1) / CLOG_XACTS_PER_PAGE; + + Assert(oldseg.segno == pageno / SLRU_PAGES_PER_SEGMENT_OLD); + Assert(!oldseg.pageno); + Assert(!oldseg.file); + oldseg.segno = 0; + + Assert(newseg.segno == pageno / SLRU_PAGES_PER_SEGMENT); + Assert(!newseg.pageno); + Assert(!newseg.file); + newseg.segno = 0; + } + + read_old_segment_page(&oldseg, buf, &is_empty); + write_new_segment_page(&newseg, buf); + } + + /* Release resources */ + close_segment(&oldseg); + close_segment(&newseg); +} + /* * Convert pg_multixact/offsets segments and return oldest multi offset. */ diff --git a/src/bin/pg_upgrade/t/002_pg_upgrade.pl b/src/bin/pg_upgrade/t/002_pg_upgrade.pl index 00051b85035..ce58acdf5ca 100644 --- a/src/bin/pg_upgrade/t/002_pg_upgrade.pl +++ b/src/bin/pg_upgrade/t/002_pg_upgrade.pl @@ -323,6 +323,14 @@ if (defined($ENV{oldinstall})) $oldnode->append_conf('postgresql.conf', 'autovacuum = off'); $oldnode->restart; +$oldnode->safe_psql('regression', + "CREATE TABLE t1 (id SERIAL NOT NULL PRIMARY KEY, plt text, pln NUMERIC(8, 4)); + INSERT INTO t1 (plt, pln) SELECT md5(random()::text), random() * 9999 FROM generate_series(1, 1000);"); +my $relfrozenxid = $oldnode->safe_psql('regression', + "SELECT relfrozenxid FROM pg_class WHERE relname = 't1';"); +my $relminmxid = $oldnode->safe_psql('regression', + "SELECT relminmxid FROM pg_class WHERE relname = 't1';"); + # Take a dump before performing the upgrade as a base comparison. Note # that we need to use pg_dumpall from the new node here. my @dump_command = ( @@ -487,6 +495,16 @@ ok( !-d $newnode->data_dir . "/pg_upgrade_output.d", $newnode->start; +my $relfrozenxid_new = $newnode->safe_psql('regression', + "SELECT relfrozenxid FROM pg_class WHERE relname = 't1';"); + +is($relfrozenxid_new, $relfrozenxid, 'old and new relfrozenxid match after pg_upgrade'); + +my $relminmxid_new = $newnode->safe_psql('regression', + "SELECT relminmxid FROM pg_class WHERE relname = 't1';"); + +is($relminmxid_new, $relminmxid, 'old and new relminmxid match after pg_upgrade'); + # Check if there are any logs coming from pg_upgrade, that would only be # retained on failure. my $log_path = $newnode->data_dir . "/pg_upgrade_output.d"; diff --git a/src/bin/pg_upgrade/version.c b/src/bin/pg_upgrade/version.c index 3ad5a991a30..0443640415d 100644 --- a/src/bin/pg_upgrade/version.c +++ b/src/bin/pg_upgrade/version.c @@ -9,6 +9,7 @@ #include "postgres_fe.h" +#include "access/transam.h" #include "fe_utils/string_utils.h" #include "pg_upgrade.h" @@ -29,19 +30,21 @@ jsonb_9_4_check_applicable(ClusterInfo *cluster) } /* - * old_9_6_invalidate_hash_indexes() - * 9.6 -> 10 - * Hash index binary format has changed from 9.6->10.0 + * invalidate_indexes() + * Invalidates all indexes satisfying given predicate. */ -void -old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) +static void +invalidate_indexes(ClusterInfo *cluster, bool check_mode, + const char *name, const char *pred) { int dbnum; FILE *script = NULL; bool found = false; - char *output_path = "reindex_hash.sql"; + char output_path[MAXPGPATH]; + + snprintf(output_path, sizeof(output_path), "reindex_%s.sql", name); - prep_status("Checking for hash indexes"); + prep_status("Checking for %s indexes", name); for (dbnum = 0; dbnum < cluster->dbarr.ndbs; dbnum++) { @@ -54,9 +57,16 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) DbInfo *active_db = &cluster->dbarr.dbs[dbnum]; PGconn *conn = connectToServer(cluster, active_db->db_name); - /* find hash indexes */ - res = executeQueryOrDie(conn, - "SELECT n.nspname, c.relname " + + /* + * Find indexes satisfying predicate. + * + * System indexes (with oids < FirstNormalObjectId) are excluded from + * the search as they are recreated in the new cluster during initdb. + */ + res = executeQueryOrDie( + conn, + "SELECT n.nspname, c.relname, i.indexrelid " "FROM pg_catalog.pg_class c, " " pg_catalog.pg_index i, " " pg_catalog.pg_am a, " @@ -64,8 +74,11 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) "WHERE i.indexrelid = c.oid AND " " c.relam = a.oid AND " " c.relnamespace = n.oid AND " - " a.amname = 'hash'" - ); + " i.indexrelid >= '%u'::pg_catalog.oid AND " + " %s " + "ORDER BY i.indexrelid ASC", + FirstNormalObjectId, + pred); ntups = PQntuples(res); i_nspname = PQfnumber(res, "nspname"); @@ -97,8 +110,14 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) if (!check_mode && db_used) { - /* mark hash indexes as invalid */ - PQclear(executeQueryOrDie(conn, + /* + * Mark indexes satisfying predicate as invalid. + * + * System indexes (with oids < FirstNormalObjectId) are excluded + * from the search (see above). + */ + PQclear(executeQueryOrDie( + conn, "UPDATE pg_catalog.pg_index i " "SET indisvalid = false " "FROM pg_catalog.pg_class c, " @@ -107,7 +126,10 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) "WHERE i.indexrelid = c.oid AND " " c.relam = a.oid AND " " c.relnamespace = n.oid AND " - " a.amname = 'hash'")); + " i.indexrelid >= '%u'::pg_catalog.oid AND " + " %s", + FirstNormalObjectId, + pred)); } PQfinish(conn); @@ -121,24 +143,72 @@ old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) report_status(PG_WARNING, "warning"); if (check_mode) pg_log(PG_WARNING, "\n" - "Your installation contains hash indexes. These indexes have different\n" + "Your installation contains %s indexes. These indexes have different\n" "internal formats between your old and new clusters, so they must be\n" "reindexed with the REINDEX command. After upgrading, you will be given\n" - "REINDEX instructions."); + "REINDEX instructions.", + name); + else pg_log(PG_WARNING, "\n" - "Your installation contains hash indexes. These indexes have different\n" + "Your installation contains %s indexes. These indexes have different\n" "internal formats between your old and new clusters, so they must be\n" "reindexed with the REINDEX command. The file\n" " %s\n" "when executed by psql by the database superuser will recreate all invalid\n" "indexes; until then, none of these indexes will be used.", + name, output_path); } else check_ok(); } +/* + * old_9_6_invalidate_hash_indexes() + * 9.6 -> 10 + * Hash index binary format has changed from 9.6->10.0 + */ +void +old_9_6_invalidate_hash_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "hash", "a.amname = 'hash'"); +} + + +/* + * invalidate_spgist_indexes() + * 32bit -> 64bit + * SP-GIST contains xids. + */ +void +invalidate_spgist_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "spgist", "a.amname = 'spgist'"); +} + +/* + * invalidate_gin_indexes() + * 32bit -> 64bit + * Gin indexes contains xids in deleted pages. + */ +void +invalidate_gin_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "gin", "a.amname = 'gin'"); +} + +/* + * invalidate_external_indexes() + * Generate script to REINDEX non standard external indexes (like RUM etc) + */ +void +invalidate_external_indexes(ClusterInfo *cluster, bool check_mode) +{ + invalidate_indexes(cluster, check_mode, "external", + "NOT a.amname IN ('btree', 'hash', 'gist', 'gin', 'spgist', 'brin')"); +} + /* * Callback function for processing results of query for * report_extension_updates()'s UpgradeTask. If the query returned any rows, diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 76fa042113e..9481e62a5fd 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -1050,7 +1050,7 @@ main(int argc, char **argv) config.filter_by_fpw = true; break; case 'x': - if (sscanf(optarg, "%u", &config.filter_by_xid) != 1) + if (sscanf(optarg, "%" PRIu64 "u", &config.filter_by_xid) != 1) { pg_log_error("invalid transaction ID specification: \"%s\"", optarg); diff --git a/src/bin/pg_waldump/t/001_basic.pl b/src/bin/pg_waldump/t/001_basic.pl index f26d75e01cf..19f518436d9 100644 --- a/src/bin/pg_waldump/t/001_basic.pl +++ b/src/bin/pg_waldump/t/001_basic.pl @@ -73,7 +73,8 @@ BRIN CommitTs ReplicationOrigin Generic -LogicalMessage$/, +LogicalMessage +Heap3$/, 'rmgr list'); diff --git a/src/include/access/ginblock.h b/src/include/access/ginblock.h index 4c1681068db..59d176eccfd 100644 --- a/src/include/access/ginblock.h +++ b/src/include/access/ginblock.h @@ -133,8 +133,15 @@ typedef struct GinMetaPageData * We should reclaim deleted page only once every transaction started before * its deletion is over. */ -#define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid ) -#define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid) +#define GinPageGetDeleteXid(page) ( \ + (((PageHeader) (page))->pd_upper == BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId)) ? \ + *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) : \ + InvalidTransactionId ) +#define GinPageSetDeleteXid(page, xid) \ + do { \ + ((PageHeader) (page))->pd_upper = BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId); \ + *((TransactionId *) ((char *) (page) + BLCKSZ - sizeof(GinPageOpaqueData) - sizeof(TransactionId))) = xid; \ + } while (false) extern bool GinPageIsRecyclable(Page page); /* diff --git a/src/include/access/gist.h b/src/include/access/gist.h index db78e60eeab..e389ebc8a5d 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -226,7 +226,7 @@ GistPageGetDeleteXid(Page page) return ((GISTDeletedPageContents *) PageGetContents(page))->deleteXid; } else - return FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); + return FullTransactionIdFromXid(FirstNormalTransactionId); } /* diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 1640d9c32f7..923c5f4e97d 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -96,6 +96,8 @@ typedef struct HeapScanDescData uint32 rs_cindex; /* current tuple's index in vistuples */ uint32 rs_ntuples; /* number of visible tuples on page */ OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ + TransactionId rs_xmin[MaxHeapTuplesPerPage]; /* their xmins */ + TransactionId rs_xmax[MaxHeapTuplesPerPage]; /* their xmaxs */ } HeapScanDescData; typedef struct HeapScanDescData *HeapScanDesc; @@ -327,6 +329,8 @@ extern void ReleaseBulkInsertStatePin(BulkInsertState bistate); extern void heap_insert(Relation relation, HeapTuple tup, CommandId cid, int options, BulkInsertState bistate); +extern void rewrite_page_prepare_for_xid(Page page, HeapTuple tup, + bool is_toast); extern void heap_multi_insert(Relation relation, struct TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate); @@ -353,23 +357,23 @@ extern void heap_inplace_update_and_unlock(Relation relation, Buffer buffer); extern void heap_inplace_unlock(Relation relation, HeapTuple oldtup, Buffer buffer); -extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, +extern bool heap_prepare_freeze_tuple(HeapTuple tuple, const struct VacuumCutoffs *cutoffs, HeapPageFreeze *pagefrz, HeapTupleFreeze *frz, bool *totally_frozen); -extern void heap_pre_freeze_checks(Buffer buffer, +extern void heap_pre_freeze_checks(Relation rel, Buffer buffer, HeapTupleFreeze *tuples, int ntuples); -extern void heap_freeze_prepared_tuples(Buffer buffer, +extern void heap_freeze_prepared_tuples(Relation rel, Buffer buffer, HeapTupleFreeze *tuples, int ntuples); -extern bool heap_freeze_tuple(HeapTupleHeader tuple, +extern bool heap_freeze_tuple(HeapTuple tuple, TransactionId relfrozenxid, TransactionId relminmxid, TransactionId FreezeLimit, TransactionId MultiXactCutoff); -extern bool heap_tuple_should_freeze(HeapTupleHeader tuple, +extern bool heap_tuple_should_freeze(HeapTuple tuple, const struct VacuumCutoffs *cutoffs, TransactionId *NoFreezePageRelfrozenXid, MultiXactId *NoFreezePageRelminMxid); -extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); +extern bool heap_tuple_needs_eventual_freeze(HeapTuple tuple); extern void simple_heap_insert(Relation relation, HeapTuple tup); extern void simple_heap_delete(Relation relation, ItemPointer tid); @@ -390,12 +394,19 @@ extern void heap_page_prune_and_freeze(Relation relation, Buffer buffer, PruneReason reason, OffsetNumber *off_loc, TransactionId *new_relfrozen_xid, - MultiXactId *new_relmin_mxid); + MultiXactId *new_relmin_mxid, + bool repairFragmentation); + extern void heap_page_prune_execute(Buffer buffer, bool lp_truncate_only, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, - OffsetNumber *nowunused, int nunused); -extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets); + OffsetNumber *nowunused, int nunused, + bool repairFragmentation, + bool is_toast); + +extern void heap_get_root_tuples(Relation relation, Buffer buffer, Page page, + OffsetNumber *root_offsets); + extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer, TransactionId conflict_xid, bool cleanup_lock, @@ -403,7 +414,8 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer, HeapTupleFreeze *frozen, int nfrozen, OffsetNumber *redirected, int nredirected, OffsetNumber *dead, int ndead, - OffsetNumber *unused, int nunused); + OffsetNumber *unused, int nunused, + bool repairFragmentation); /* in heap/vacuumlazy.c */ struct VacuumParams; @@ -421,7 +433,7 @@ extern HTSV_Result HeapTupleSatisfiesVacuumHorizon(HeapTuple htup, Buffer buffer TransactionId *dead_after); extern void HeapTupleSetHintBits(HeapTupleHeader tuple, Buffer buffer, uint16 infomask, TransactionId xid); -extern bool HeapTupleHeaderIsOnlyLocked(HeapTupleHeader tuple); +extern bool HeapTupleIsOnlyLocked(HeapTuple htup); extern bool HeapTupleIsSurelyDead(HeapTuple htup, struct GlobalVisState *vistest); @@ -448,9 +460,10 @@ extern void HeapCheckForSerializableConflictOut(bool visible, Relation relation, * in private storage (which is what CLUSTER and friends do). */ static inline void -heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz) +heap_execute_freeze_tuple(HeapTuple htup, HeapTupleFreeze *frz) { - HeapTupleHeaderSetXmax(tuple, frz->xmax); + HeapTupleHeader tuple = htup->t_data; + HeapTupleSetXmax(htup, frz->xmax); if (frz->frzflags & XLH_FREEZE_XVAC) HeapTupleHeaderSetXvac(tuple, FrozenTransactionId); @@ -462,4 +475,16 @@ heap_execute_freeze_tuple(HeapTupleHeader tuple, HeapTupleFreeze *frz) tuple->t_infomask2 = frz->t_infomask2; } +static inline void +heap_execute_freeze_tuple_page(Page page, HeapTupleHeader tuple, + HeapTupleFreeze *frz, bool is_toast) +{ + HeapTupleData htup; + + htup.t_data = tuple; + heap_execute_freeze_tuple(&htup, frz); + + HeapTupleHeaderStoreXmax(page, &htup, is_toast); +} + #endif /* HEAPAM_H */ diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index 277df6b3cf0..b5577b7c0e3 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -65,6 +65,8 @@ #define XLOG_HEAP2_LOCK_UPDATED 0x60 #define XLOG_HEAP2_NEW_CID 0x70 +#define XLOG_HEAP3_BASE_SHIFT 0x00 + /* * xl_heap_insert/xl_heap_multi_insert flag values, 8 bits are available. */ @@ -104,6 +106,7 @@ #define XLH_DELETE_CONTAINS_OLD_KEY (1<<2) #define XLH_DELETE_IS_SUPER (1<<3) #define XLH_DELETE_IS_PARTITION_MOVE (1<<4) +#define XLH_DELETE_PAGE_ON_TOAST_RELATION (1<<5) /* convenience macro for checking whether any form of old tuple was logged */ #define XLH_DELETE_CONTAINS_OLD \ @@ -282,10 +285,12 @@ typedef struct xl_heap_update * other fields require only 2-byte alignment. This is also the reason that * 'frz_offsets' is stored separately from the xlhp_freeze_plan structs. */ + typedef struct xl_heap_prune { uint8 reason; - uint8 flags; + uint8 padding; + uint16 flags; /* * If XLHP_HAS_CONFLICT_HORIZON is set, the conflict horizon XID follows, @@ -293,7 +298,7 @@ typedef struct xl_heap_prune */ } xl_heap_prune; -#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(uint8)) +#define SizeOfHeapPrune (offsetof(xl_heap_prune, flags) + sizeof(((xl_heap_prune*)0)->flags)) /* to handle recovery conflict during logical decoding on standby */ #define XLHP_IS_CATALOG_REL (1 << 1) @@ -331,6 +336,9 @@ typedef struct xl_heap_prune #define XLHP_HAS_DEAD_ITEMS (1 << 6) #define XLHP_HAS_NOW_UNUSED_ITEMS (1 << 7) +#define XLHP_ON_TOAST_RELATION (1 << 8) +#define XLHP_REPAIR_FRAGMENTATION (1 << 9) + /* * xlhp_freeze_plan describes how to freeze a group of one or more heap tuples * (appears in xl_heap_prune's xlhp_freeze_plans sub-record) @@ -480,7 +488,19 @@ typedef struct xl_heap_rewrite_mapping XLogRecPtr start_lsn; /* Insert LSN at begin of rewrite */ } xl_heap_rewrite_mapping; -extern void HeapTupleHeaderAdvanceConflictHorizon(HeapTupleHeader tuple, +#define XLH_BASE_SHIFT_ON_TOAST_RELATION 0x01 + +/* shift the base of xids on heap page */ +typedef struct xl_heap_base_shift +{ + int64 delta; /* delta value to shift the base */ + bool multi; /* true to shift multixact base */ + uint8 flags; +} xl_heap_base_shift; + +#define SizeOfHeapBaseShift (offsetof(xl_heap_base_shift, flags) + sizeof(uint8)) + +extern void HeapTupleHeaderAdvanceConflictHorizon(HeapTuple tuple, TransactionId *snapshotConflictHorizon); extern void heap_redo(XLogReaderState *record); @@ -490,6 +510,9 @@ extern void heap_mask(char *pagedata, BlockNumber blkno); extern void heap2_redo(XLogReaderState *record); extern void heap2_desc(StringInfo buf, XLogReaderState *record); extern const char *heap2_identify(uint8 info); +extern void heap3_redo(XLogReaderState *record); +extern void heap3_desc(StringInfo buf, XLogReaderState *record); +extern const char *heap3_identify(uint8 info); extern void heap_xlog_logical_rewrite(XLogReaderState *r); extern XLogRecPtr log_heap_visible(Relation rel, Buffer heap_buffer, diff --git a/src/include/access/heaptoast.h b/src/include/access/heaptoast.h index 6385a27caf8..416556972fb 100644 --- a/src/include/access/heaptoast.h +++ b/src/include/access/heaptoast.h @@ -20,10 +20,19 @@ /* * Find the maximum size of a tuple if there are to be N tuples per page. */ +#if MAXIMUM_ALIGNOF == 8 #define MaximumBytesPerTuple(tuplesPerPage) \ MAXALIGN_DOWN((BLCKSZ - \ - MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData))) \ + MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData))) \ / (tuplesPerPage)) +#elif MAXIMUM_ALIGNOF == 4 +#define MaximumBytesPerTuple(tuplesPerPage) \ + MAXALIGN_DOWN((BLCKSZ - \ + MAXALIGN(SizeOfPageHeaderData + (tuplesPerPage) * sizeof(ItemIdData)) - MAXALIGN(sizeof(ToastPageSpecialData))) \ + / (tuplesPerPage)) +#else +#error "unknown arch bitness" +#endif /* * These symbols control toaster activation. If a tuple is larger than diff --git a/src/include/access/htup.h b/src/include/access/htup.h index f0e3aa87dc3..301b60bd643 100644 --- a/src/include/access/htup.h +++ b/src/include/access/htup.h @@ -54,6 +54,12 @@ typedef MinimalTupleData *MinimalTuple; * this can't be told apart from case #1 by inspection; code setting up * or destroying this representation has to know what it's doing. * + * t_xmin and t_xmax are TransactionId values stored in heap tuple header. + * Normally they are calculated from ShortTransactionId-sized on-disk tuple + * xmin/xmax representation: + * t_data->t_choice.t_heap.t_xmin/t_data->t_choice.t_heap.t_xmin + * and pd_xid_base and pd_multi_base commmon values for all tuples on a page. + * * t_len should always be valid, except in the pointer-to-nothing case. * t_self and t_tableOid should be valid if the HeapTupleData points to * a disk buffer, or if it represents a copy of a tuple on disk. They @@ -61,10 +67,12 @@ typedef MinimalTupleData *MinimalTuple; */ typedef struct HeapTupleData { + TransactionId t_xmin; /* calculated tuple xmin */ + TransactionId t_xmax; /* calculated tuple xmax */ uint32 t_len; /* length of *t_data */ ItemPointerData t_self; /* SelfItemPointer */ Oid t_tableOid; /* table the tuple came from */ -#define FIELDNO_HEAPTUPLEDATA_DATA 3 +#define FIELDNO_HEAPTUPLEDATA_DATA 5 HeapTupleHeader t_data; /* -> tuple header and data */ } HeapTupleData; @@ -78,12 +86,10 @@ typedef HeapTupleData *HeapTuple; #define HeapTupleIsValid(tuple) PointerIsValid(tuple) /* HeapTupleHeader functions implemented in utils/time/combocid.c */ -extern CommandId HeapTupleHeaderGetCmin(const HeapTupleHeaderData *tup); -extern CommandId HeapTupleHeaderGetCmax(const HeapTupleHeaderData *tup); -extern void HeapTupleHeaderAdjustCmax(const HeapTupleHeaderData *tup, - CommandId *cmax, bool *iscombo); +extern CommandId HeapTupleGetCmin(const HeapTupleData *tup); +extern CommandId HeapTupleGetCmax(const HeapTupleData *tup); +extern void HeapTupleAdjustCmax(const HeapTupleData *tup, CommandId *cmax, bool *iscombo); /* Prototype for HeapTupleHeader accessors in heapam.c */ -extern TransactionId HeapTupleGetUpdateXid(const HeapTupleHeaderData *tup); - +extern TransactionId HeapTupleGetUpdateXid(const HeapTupleData *tup); #endif /* HTUP_H */ diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h index aa957cf3b01..6f3baacdac2 100644 --- a/src/include/access/htup_details.h +++ b/src/include/access/htup_details.h @@ -19,6 +19,7 @@ #include "access/tupdesc.h" #include "access/tupmacs.h" #include "storage/bufpage.h" +#include "storage/bufmgr.h" #include "varatt.h" /* @@ -121,13 +122,13 @@ typedef struct HeapTupleFields { - TransactionId t_xmin; /* inserting xact ID */ - TransactionId t_xmax; /* deleting or locking xact ID */ + ShortTransactionId t_xmin; /* inserting xact ID */ + ShortTransactionId t_xmax; /* deleting or locking xact ID */ union { CommandId t_cid; /* inserting or deleting command ID, or both */ - TransactionId t_xvac; /* old-style VACUUM FULL xact ID */ + ShortTransactionId t_xvac; /* old-style VACUUM FULL xact ID */ } t_field3; } HeapTupleFields; @@ -223,7 +224,7 @@ struct HeapTupleHeaderData * HEAP_XMAX_LOCK_ONLY bit is set; or, for pg_upgrade's sake, if the Xmax is * not a multi and the EXCL_LOCK bit is set. * - * See also HeapTupleHeaderIsOnlyLocked, which also checks for a possible + * See also HeapTupleIsOnlyLocked, which also checks for a possible * aborted updater transaction. */ static inline bool @@ -312,30 +313,82 @@ HEAP_XMAX_IS_KEYSHR_LOCKED(int16 infomask) static bool HeapTupleHeaderXminFrozen(const HeapTupleHeaderData *tup); /* - * HeapTupleHeaderGetRawXmin returns the "raw" xmin field, which is the xid + * HeapTupleGetRawXmin returns the "raw" xmin field, which is the xid * originally used to insert the tuple. However, the tuple might actually - * be frozen (via HeapTupleHeaderSetXminFrozen) in which case the tuple's xmin + * be frozen (via HeapTupleHeaderStoreXminFrozen) in which case the tuple's xmin * is visible to every snapshot. Prior to PostgreSQL 9.4, we actually changed * the xmin to FrozenTransactionId, and that value may still be encountered * on disk. */ static inline TransactionId -HeapTupleHeaderGetRawXmin(const HeapTupleHeaderData *tup) +HeapTupleGetRawXmin(const HeapTupleData *tup) { - return tup->t_choice.t_heap.t_xmin; + return tup->t_xmin; } static inline TransactionId -HeapTupleHeaderGetXmin(const HeapTupleHeaderData *tup) +HeapTupleGetXmin(const HeapTupleData *tup) { - return HeapTupleHeaderXminFrozen(tup) ? - FrozenTransactionId : HeapTupleHeaderGetRawXmin(tup); + return HeapTupleHeaderXminFrozen(tup->t_data) ? + FrozenTransactionId : HeapTupleGetRawXmin(tup); } static inline void -HeapTupleHeaderSetXmin(HeapTupleHeaderData *tup, TransactionId xid) +HeapTupleSetXmin(HeapTupleData *tup, TransactionId xid) { - tup->t_choice.t_heap.t_xmin = xid; + tup->t_xmin = xid; +} + +/* + * Functions for accessing "double xmax". On pg_upgraded instances, it might + * happend that we can't fit new special area to the page. But we still + * might neep to write xmax of tuples for updates and deletes. The trick is + * that we actually don't need xmin field. After pg_upgrade (wich implies + * restart) no insertions went to this page yet (otherwise special area could + * fit). So, if tuple is visible (othewise it would be deleted), then it's + * visible for everybody. Thus, t_xmin isn't needed. Therefore, we can use + * both t_xmin and t_xmax to store 64-bit xmax. + * + * See heap_convert.c for details. + */ +static inline TransactionId +HeapTupleHeaderGetDoubleXmax(const HeapTupleHeaderData *htup) +{ + TransactionId xmax; + + xmax = htup->t_choice.t_heap.t_xmin; + xmax <<= 32; + xmax += htup->t_choice.t_heap.t_xmax; + + return xmax; +} + +static inline void +HeapTupleHeaderSetDoubleXmax(HeapTupleHeader htup, TransactionId xid) +{ + htup->t_choice.t_heap.t_xmax = xid & 0xFFFFFFFF; + htup->t_choice.t_heap.t_xmin = (xid >> 32) & 0xFFFFFFFF; +} + +static inline void +HeapTupleHeaderStoreXmin(Page page, HeapTuple htup, bool is_toast) +{ + TransactionId base; + + Assert(!HeapPageIsDoubleXmax(page)); + + base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base : + HeapPageGetSpecial((page))->pd_xid_base; + htup->t_data->t_choice.t_heap.t_xmin = + NormalTransactionIdToShort(base, htup->t_xmin); +} + +static inline void +HeapTupleAndHeaderSetXmin(Page page, HeapTuple tup, TransactionId xid, + bool is_toast) +{ + HeapTupleSetXmin(tup, xid); + HeapTupleHeaderStoreXmin(page, tup, is_toast); } static inline bool @@ -372,48 +425,105 @@ HeapTupleHeaderSetXminInvalid(HeapTupleHeaderData *tup) } static inline void +HeapTupleHeaderStoreXminFrozen(HeapTupleHeaderData *tup) +{ + AssertMacro(!HeapTupleHeaderXminInvalid(tup)); + tup->t_infomask |= HEAP_XMIN_FROZEN; +} + +/* static inline void HeapTupleHeaderSetXminFrozen(HeapTupleHeaderData *tup) { Assert(!HeapTupleHeaderXminInvalid(tup)); tup->t_infomask |= HEAP_XMIN_FROZEN; +} */ + +static inline TransactionId +HeapTupleHeaderGetRawXmax(Page page, const HeapTupleHeaderData *htup) +{ + TransactionId base; + + if (HeapPageIsDoubleXmax(page)) + return HeapTupleHeaderGetDoubleXmax(htup); + + base = (htup->t_infomask & HEAP_XMAX_IS_MULTI) ? + HeapPageGetSpecial(page)->pd_multi_base : + HeapPageGetSpecial(page)->pd_xid_base; + return ShortTransactionIdToNormal(base, + htup->t_choice.t_heap.t_xmax); } static inline TransactionId -HeapTupleHeaderGetRawXmax(const HeapTupleHeaderData *tup) +HeapTupleGetRawXmax(const HeapTupleData *tup) +{ + return tup->t_xmax; +} + +static inline void +HeapTupleSetXmax(HeapTupleData *tup, TransactionId xid) +{ + tup->t_xmax = (xid); +} + +/* + * Set xid as xmax for HeapTupleHeader. + */ +static inline void +HeapTupleHeaderStoreXmax(Page page, HeapTuple tup, bool is_toast) { - return tup->t_choice.t_heap.t_xmax; + TransactionId base; + + if (HeapPageIsDoubleXmax(page)) + { + HeapTupleHeaderSetDoubleXmax(tup->t_data, tup->t_xmax); + return; + } + + if (is_toast) + base = ToastPageGetSpecial(page)->pd_xid_base; + else + base = (tup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) != 0 ? + HeapPageGetSpecial(page)->pd_multi_base : + HeapPageGetSpecial(page)->pd_xid_base; + tup->t_data->t_choice.t_heap.t_xmax = + NormalTransactionIdToShort(base, tup->t_xmax); } +/* + * Set xid as xmax for HeadTuple and HeapTupleHeader. + */ static inline void -HeapTupleHeaderSetXmax(HeapTupleHeaderData *tup, TransactionId xid) +HeapTupleAndHeaderSetXmax(Page page, HeapTuple tup, TransactionId xid, + bool is_toast) { - tup->t_choice.t_heap.t_xmax = xid; + HeapTupleSetXmax(tup, xid); + HeapTupleHeaderStoreXmax(page, tup, is_toast); } #ifndef FRONTEND /* * HeapTupleHeaderGetRawXmax gets you the raw Xmax field. To find out the Xid * that updated a tuple, you might need to resolve the MultiXactId if certain - * bits are set. HeapTupleHeaderGetUpdateXid checks those bits and takes care + * bits are set. HeapTupleGetUpdateXidAny checks those bits and takes care * to resolve the MultiXactId if necessary. This might involve multixact I/O, * so it should only be used if absolutely necessary. */ static inline TransactionId -HeapTupleHeaderGetUpdateXid(const HeapTupleHeaderData *tup) +HeapTupleGetUpdateXidAny(const HeapTupleData *tup) { - if (!((tup)->t_infomask & HEAP_XMAX_INVALID) && - ((tup)->t_infomask & HEAP_XMAX_IS_MULTI) && - !((tup)->t_infomask & HEAP_XMAX_LOCK_ONLY)) + if (!((tup)->t_data->t_infomask & HEAP_XMAX_INVALID) && + ((tup)->t_data->t_infomask & HEAP_XMAX_IS_MULTI) && + !((tup)->t_data->t_infomask & HEAP_XMAX_LOCK_ONLY)) return HeapTupleGetUpdateXid(tup); else - return HeapTupleHeaderGetRawXmax(tup); + return HeapTupleGetRawXmax(tup); } #endif /* FRONTEND */ /* * HeapTupleHeaderGetRawCommandId will give you what's in the header whether - * it is useful or not. Most code should use HeapTupleHeaderGetCmin or - * HeapTupleHeaderGetCmax instead, but note that those Assert that you can + * it is useful or not. Most code should use HeapTupleGetCmin or + * HeapTupleGetCmax instead, but note that those Assert that you can * get a legitimate result, ie you are in the originating transaction! */ static inline CommandId @@ -431,7 +541,7 @@ HeapTupleHeaderSetCmin(HeapTupleHeaderData *tup, CommandId cid) tup->t_infomask &= ~HEAP_COMBOCID; } -/* SetCmax must be used after HeapTupleHeaderAdjustCmax; see combocid.c */ +/* SetCmax must be used after HeapTupleAdjustCmax; see combocid.c */ static inline void HeapTupleHeaderSetCmax(HeapTupleHeaderData *tup, CommandId cid, bool iscombo) { @@ -611,8 +721,16 @@ BITMAPLEN(int NATTS) * an otherwise-empty page can indeed hold a tuple of this size. Because * ItemIds and tuples have different alignment requirements, don't assume that * you can, say, fit 2 tuples of size MaxHeapTupleSize/2 on the same page. + * + * On shift to 64-bit XIDs MaxHeapTupleSize decreased by sizeof(HeapPageSpecialData). + * Extant tuples with length over new MaxHeapTupleSize are inherited on DoubleXmax + * pages. They could be read, but can not be updated unless their length decreases + * to fit MaxHeapTupleSize. Vacuum full will also copy these double xmax pages + * without change. */ -#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData))) + +#define MaxHeapTupleSize (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData)) - MAXALIGN(sizeof(HeapPageSpecialData))) +#define MaxHeapTupleSize_32 (BLCKSZ - MAXALIGN(SizeOfPageHeaderData + sizeof(ItemIdData))) #define MinHeapTupleSize MAXALIGN(SizeofHeapTupleHeader) /* @@ -805,6 +923,140 @@ HeapTupleClearHeapOnly(const HeapTupleData *tuple) HeapTupleHeaderClearHeapOnly(tuple->t_data); } +/* + * Copy base values for xid and multixacts from one heap tuple to heap tuple. + * Should be called on tuple copy or making desc tuple on the base on src tuple + * saving visibility information. + */ +static inline void +HeapTupleCopyXids(HeapTuple dest, HeapTuple src) +{ + dest->t_xmin = src->t_xmin; + dest->t_xmax = src->t_xmax; +} + +/* + * Set base values for tuple xids/multixacts to zero. Used when visibility + * infromation is negligible or will be set later. + */ +static inline void +HeapTupleSetZeroXids(HeapTuple tup) +{ + tup->t_xmin = 0; + tup->t_xmax = 0; +} + +/* + * Copy HeapTupleHeader xmin/xmax in raw way ??? + */ +static inline void +HeapTupleCopyHeaderXids(HeapTuple tup) \ +{ + tup->t_xmin = tup->t_data->t_choice.t_heap.t_xmin; + tup->t_xmax = tup->t_data->t_choice.t_heap.t_xmax; +} + + +static inline void +HeapTupleCopyRawXminFromPage(HeapTuple tup, Page page, bool is_toast) +{ + TransactionId base; + ShortTransactionId xmin; /* short xmin from tuple header */ + + xmin = tup->t_data->t_choice.t_heap.t_xmin; + + if (!TransactionIdIsNormal(xmin)) + base = 0; + else if (is_toast) + base = ToastPageGetSpecial(page)->pd_xid_base; + else + base = HeapPageGetSpecial(page)->pd_xid_base; + + tup->t_xmin = ShortTransactionIdToNormal(base, xmin); +} + +static inline void +HeapTupleCopyXminFromPage(HeapTuple tup, Page page, bool is_toast) +{ + if (HeapTupleHeaderXminFrozen(tup->t_data)) + { + tup->t_xmin = FrozenTransactionId; + return; + } + + HeapTupleCopyRawXminFromPage(tup, page, is_toast); +} + +static inline void +HeapTupleCopyXmaxFromPage(HeapTuple tup, Page page, bool is_toast) +{ + TransactionId base; + ShortTransactionId xmax; /* short xmax from tuple header */ + + xmax = tup->t_data->t_choice.t_heap.t_xmax; + + if (!TransactionIdIsNormal(xmax)) + base = 0; + else if (is_toast) + /* + * Toast page is not expected to have multixacts in chunks and + * has shorter special. + */ + base = ToastPageGetSpecial(page)->pd_xid_base; + else if (tup->t_data->t_infomask & HEAP_XMAX_IS_MULTI) + base = HeapPageGetSpecial(page)->pd_multi_base; + else + base = HeapPageGetSpecial(page)->pd_xid_base; + + tup->t_xmax = ShortTransactionIdToNormal(base, xmax); +} + +static inline void +HeapTupleCopyXidsFromPage_Basis(Buffer buffer, HeapTuple tup, Page page, + bool is_toast) +{ + Assert(IsBufferLocked(buffer)); + /* + * It is quite a rough check, but it is sufficient to detect that + * the tuple does not belong to the page. + */ + Assert( (char*) tup->t_data > (char*) page && + (char*) tup->t_data <= (char*) page + ((PageHeader) page)->pd_special - MinHeapTupleSize); + + if (HeapPageIsDoubleXmax(page)) + { + /* + * On double xmax pages, xmax is extracted from tuple header. + */ + tup->t_xmin = FrozenTransactionId; + tup->t_xmax = HeapTupleHeaderGetDoubleXmax(tup->t_data); + return; + } + + HeapTupleCopyXmaxFromPage(tup, page, is_toast); +} + +/* + * Copy base values for xid and multixacts from page to heap tuple. Should be + * called each time tuple is read from page. Otherwise, it would be impossible + * to correctly read tuple xmin and xmax. + */ +static inline void +HeapTupleCopyRawXidsFromPage(Buffer buffer, HeapTuple tup, Page page, + bool is_toast) +{ + HeapTupleCopyXidsFromPage_Basis(buffer, tup, page, is_toast); + HeapTupleCopyRawXminFromPage(tup, page, is_toast); +} + +static inline void +HeapTupleCopyXidsFromPage(Buffer buffer, HeapTuple tup, Page page, + bool is_toast) +{ + HeapTupleCopyXidsFromPage_Basis(buffer, tup, page, is_toast); + HeapTupleCopyXminFromPage(tup, page, is_toast); +} + /* prototypes for functions in common/heaptuple.c */ extern Size heap_compute_data_size(TupleDesc tupleDesc, const Datum *values, const bool *isnull); diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 5ee632dfe69..aac62241318 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -18,12 +18,12 @@ /* * The first two MultiXactId values are reserved to store the truncation Xid - * and epoch of the first segment, so we start assigning multixact values from + * and base of the first segment, so we start assigning multixact values from * 2. */ -#define InvalidMultiXactId ((MultiXactId) 0) -#define FirstMultiXactId ((MultiXactId) 1) -#define MaxMultiXactId ((MultiXactId) 0xFFFFFFFF) +#define InvalidMultiXactId UINT64CONST(0) +#define FirstMultiXactId UINT64CONST(1) +#define MaxMultiXactId UINT64CONST(0xFFFFFFFFFFFFFFFF) #define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId) diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index faabcb78e7b..79415fec575 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -62,8 +62,10 @@ typedef uint16 BTCycleId; typedef struct BTPageOpaqueData { BlockNumber btpo_prev; /* left sibling, or P_NONE if leftmost */ + /* ... or next transaction ID (lower part) */ BlockNumber btpo_next; /* right sibling, or P_NONE if rightmost */ uint32 btpo_level; /* tree level --- zero for leaf pages */ + /* ... or next transaction ID (lower part) */ uint16 btpo_flags; /* flag bits, see below */ BTCycleId btpo_cycleid; /* vacuum cycle ID of latest split */ } BTPageOpaqueData; @@ -92,6 +94,14 @@ typedef BTPageOpaqueData *BTPageOpaque; */ #define MAX_BT_CYCLE_ID 0xFF7F +/* Macros for access xact */ +#define BTP_GET_XACT(opaque) (((uint64) ((BTPageOpaque) opaque)->btpo_prev << 32) | \ + (uint64) ((BTPageOpaque) opaque)->btpo_level) +#define BTP_SET_XACT(opaque, xact) \ +do { \ + ((BTPageOpaque) opaque)->btpo_prev = (uint32) (xact >> 32); \ + ((BTPageOpaque) opaque)->btpo_level = (uint32) xact; \ +} while (0) /* * The Meta page is always the first page in the btree index. diff --git a/src/include/access/reloptions.h b/src/include/access/reloptions.h index d0520f19d64..8938b3b0203 100644 --- a/src/include/access/reloptions.h +++ b/src/include/access/reloptions.h @@ -110,7 +110,7 @@ typedef struct relopt_int64 int64 default_val; int64 min; int64 max; -} relopt_int64; +} relopt_int64; typedef struct relopt_real { diff --git a/src/include/access/rewriteheap.h b/src/include/access/rewriteheap.h index 99c3f362adc..82cbd46b87d 100644 --- a/src/include/access/rewriteheap.h +++ b/src/include/access/rewriteheap.h @@ -51,7 +51,7 @@ typedef struct LogicalRewriteMappingData * 6) xid of the xact performing the mapping * --- */ -#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x-%x" -extern void CheckPointLogicalRewriteHeap(void); +#define LOGICAL_REWRITE_FORMAT "map-%x-%x-%X_%X-%x_%x-%x_%x" +extern void CheckPointLogicalRewriteHeap(void); #endif /* REWRITE_HEAP_H */ diff --git a/src/include/access/rmgrlist.h b/src/include/access/rmgrlist.h index 8e7fc9db877..3584a5d8cb8 100644 --- a/src/include/access/rmgrlist.h +++ b/src/include/access/rmgrlist.h @@ -47,3 +47,4 @@ PG_RMGR(RM_COMMIT_TS_ID, "CommitTs", commit_ts_redo, commit_ts_desc, commit_ts_i PG_RMGR(RM_REPLORIGIN_ID, "ReplicationOrigin", replorigin_redo, replorigin_desc, replorigin_identify, NULL, NULL, NULL, NULL) PG_RMGR(RM_GENERIC_ID, "Generic", generic_redo, generic_desc, generic_identify, NULL, NULL, generic_mask, NULL) PG_RMGR(RM_LOGICALMSG_ID, "LogicalMessage", logicalmsg_redo, logicalmsg_desc, logicalmsg_identify, NULL, NULL, NULL, logicalmsg_decode) +PG_RMGR(RM_HEAP3_ID, "Heap3", heap3_redo, heap3_desc, heap3_identify, NULL, NULL, heap_mask, NULL) diff --git a/src/include/access/slru.h b/src/include/access/slru.h index e142800aab2..8786d56ab84 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -26,15 +26,7 @@ /* * Define SLRU segment size. A page is the same BLCKSZ as is used everywhere * else in Postgres. The segment size can be chosen somewhat arbitrarily; - * we make it 32 pages by default, or 256Kb, i.e. 1M transactions for CLOG - * or 64K transactions for SUBTRANS. - * - * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, - * page numbering also wraps around at 0xFFFFFFFF/xxxx_XACTS_PER_PAGE (where - * xxxx is CLOG or SUBTRANS, respectively), and segment numbering at - * 0xFFFFFFFF/xxxx_XACTS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need - * take no explicit notice of that fact in slru.c, except when comparing - * segment and page numbers in SimpleLruTruncate (see PagePrecedes()). + * we make it 32 pages by default. */ #define SLRU_PAGES_PER_SEGMENT 32 diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index b8cb1e744ad..85c5eb304d0 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -142,7 +142,7 @@ typedef enum TU_UpdateIndexes * cmax is the outdating command's CID, but only when the failure code is * TM_SelfModified (i.e., something in the current transaction outdated the * tuple); otherwise cmax is zero. (We make this restriction because - * HeapTupleHeaderGetCmax doesn't work for tuples outdated in other + * HeapTupleGetCmax doesn't work for tuples outdated in other * transactions.) */ typedef struct TM_FailureData diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 7d82cd2eb56..2a365ba3195 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -17,6 +17,10 @@ #include "access/xlogdefs.h" +#ifndef FRONTEND +#include "utils/elog.h" +#endif + /* ---------------- * Special transaction ID values * @@ -28,11 +32,12 @@ * Note: if you need to change it, you must change pg_class.h as well. * ---------------- */ -#define InvalidTransactionId ((TransactionId) 0) -#define BootstrapTransactionId ((TransactionId) 1) -#define FrozenTransactionId ((TransactionId) 2) -#define FirstNormalTransactionId ((TransactionId) 3) -#define MaxTransactionId ((TransactionId) 0xFFFFFFFF) +#define InvalidTransactionId UINT64CONST(0) +#define BootstrapTransactionId UINT64CONST(1) +#define FrozenTransactionId UINT64CONST(2) +#define FirstNormalTransactionId UINT64CONST(3) +#define MaxTransactionId UINT64CONST(0xFFFFFFFFFFFFFFFF) +#define MaxShortTransactionId ((TransactionId) 0x7FFFFFFF) /* ---------------- * transaction ID manipulation macros @@ -44,8 +49,40 @@ #define TransactionIdStore(xid, dest) (*(dest) = (xid)) #define StoreInvalidTransactionId(dest) (*(dest) = InvalidTransactionId) -#define EpochFromFullTransactionId(x) ((uint32) ((x).value >> 32)) -#define XidFromFullTransactionId(x) ((uint32) (x).value) +/* + * Convert short xid from/to full xid. Assertion should fail if we full xid + * doesn't fit to xid base. + */ +static inline TransactionId +ShortTransactionIdToNormal(TransactionId base, ShortTransactionId xid) +{ + if (!TransactionIdIsNormal(xid)) + return (TransactionId) xid; + +#ifndef FRONTEND + /* xid + base should not overflow TransactionId */ + Assert(xid + base >= base); +#endif + + return (TransactionId) (xid + base); +} + +static inline ShortTransactionId +NormalTransactionIdToShort(TransactionId base, TransactionId xid) +{ + if (!TransactionIdIsNormal(xid)) + return (ShortTransactionId) (xid); + +#ifndef FRONTEND + /* xid should fit ShortTransactionId */ + Assert(xid >= base + FirstNormalTransactionId && + xid <= base + MaxShortTransactionId); +#endif + + return (ShortTransactionId) (xid - base); +} + +#define XidFromFullTransactionId(x) ((x).value) #define U64FromFullTransactionId(x) ((x).value) #define FullTransactionIdEquals(a, b) ((a).value == (b).value) #define FullTransactionIdPrecedes(a, b) ((a).value < (b).value) @@ -53,8 +90,8 @@ #define FullTransactionIdFollows(a, b) ((a).value > (b).value) #define FullTransactionIdFollowsOrEquals(a, b) ((a).value >= (b).value) #define FullTransactionIdIsValid(x) TransactionIdIsValid(XidFromFullTransactionId(x)) -#define InvalidFullTransactionId FullTransactionIdFromEpochAndXid(0, InvalidTransactionId) -#define FirstNormalFullTransactionId FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId) +#define InvalidFullTransactionId FullTransactionIdFromXid(InvalidTransactionId) +#define FirstNormalFullTransactionId FullTransactionIdFromXid(FirstNormalTransactionId) #define FullTransactionIdIsNormal(x) FullTransactionIdFollowsOrEquals(x, FirstNormalFullTransactionId) /* @@ -68,21 +105,11 @@ typedef struct FullTransactionId } FullTransactionId; static inline FullTransactionId -FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid) +FullTransactionIdFromXid(TransactionId xid) { FullTransactionId result; - result.value = ((uint64) epoch) << 32 | xid; - - return result; -} - -static inline FullTransactionId -FullTransactionIdFromU64(uint64 value) -{ - FullTransactionId result; - - result.value = value; + result.value = xid; return result; } @@ -91,8 +118,7 @@ FullTransactionIdFromU64(uint64 value) #define TransactionIdAdvance(dest) \ do { \ (dest)++; \ - if ((dest) < FirstNormalTransactionId) \ - (dest) = FirstNormalTransactionId; \ + Assert(TransactionIdIsNormal(dest)); \ } while(0) /* @@ -140,18 +166,19 @@ FullTransactionIdAdvance(FullTransactionId *dest) /* back up a transaction ID variable, handling wraparound correctly */ #define TransactionIdRetreat(dest) \ do { \ + Assert(TransactionIdIsNormal(dest)); \ (dest)--; \ - } while ((dest) < FirstNormalTransactionId) + } while(0) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdPrecedes(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ - (int32) ((id1) - (id2)) < 0) + (int64) ((id1) - (id2)) < 0) /* compare two XIDs already known to be normal; this is a macro for speed */ #define NormalTransactionIdFollows(id1, id2) \ (AssertMacro(TransactionIdIsNormal(id1) && TransactionIdIsNormal(id2)), \ - (int32) ((id1) - (id2)) > 0) + (int64) ((id1) - (id2)) > 0) /* ---------- * Object ID (OID) zero is InvalidOid. @@ -201,10 +228,6 @@ FullTransactionIdAdvance(FullTransactionId *dest) * OID and XID assignment state. For largely historical reasons, there is * just one struct with different fields that are protected by different * LWLocks. - * - * Note: xidWrapLimit and oldestXidDB are not "active" values, but are - * used just to generate useful messages when xidWarnLimit or xidStopLimit - * are exceeded. */ typedef struct TransamVariablesData { @@ -221,9 +244,6 @@ typedef struct TransamVariablesData TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ TransactionId xidVacLimit; /* start forcing autovacuums here */ - TransactionId xidWarnLimit; /* start complaining here */ - TransactionId xidStopLimit; /* refuse to advance nextXid beyond here */ - TransactionId xidWrapLimit; /* where the world ends */ Oid oldestXidDB; /* database with minimum datfrozenxid */ /* @@ -274,10 +294,6 @@ extern bool TransactionIdDidAbort(TransactionId transactionId); extern void TransactionIdCommitTree(TransactionId xid, int nxids, TransactionId *xids); extern void TransactionIdAsyncCommitTree(TransactionId xid, int nxids, TransactionId *xids, XLogRecPtr lsn); extern void TransactionIdAbortTree(TransactionId xid, int nxids, TransactionId *xids); -extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2); -extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2); -extern bool TransactionIdFollows(TransactionId id1, TransactionId id2); -extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2); extern TransactionId TransactionIdLatest(TransactionId mainxid, int nxids, const TransactionId *xids); extern XLogRecPtr TransactionIdGetCommitLSN(TransactionId xid); @@ -319,7 +335,7 @@ ReadNextTransactionId(void) /* return transaction ID backed up by amount, handling wraparound correctly */ static inline TransactionId -TransactionIdRetreatedBy(TransactionId xid, uint32 amount) +TransactionIdRetreatedBy(TransactionId xid, uint64 amount) { xid -= amount; @@ -370,49 +386,6 @@ FullTransactionIdNewer(FullTransactionId a, FullTransactionId b) return b; } -/* - * Compute FullTransactionId for the given TransactionId, assuming xid was - * between [oldestXid, nextXid] at the time when TransamVariables->nextXid was - * nextFullXid. When adding calls, evaluate what prevents xid from preceding - * oldestXid if SetTransactionIdLimit() runs between the collection of xid and - * the collection of nextFullXid. - */ -static inline FullTransactionId -FullTransactionIdFromAllowableAt(FullTransactionId nextFullXid, - TransactionId xid) -{ - uint32 epoch; - - /* Special transaction ID. */ - if (!TransactionIdIsNormal(xid)) - return FullTransactionIdFromEpochAndXid(0, xid); - - Assert(TransactionIdPrecedesOrEquals(xid, - XidFromFullTransactionId(nextFullXid))); - - /* - * The 64 bit result must be <= nextFullXid, since nextFullXid hadn't been - * issued yet when xid was in the past. The xid must therefore be from - * the epoch of nextFullXid or the epoch before. We know this because we - * must remove (by freezing) an XID before assigning the XID half an epoch - * ahead of it. - * - * The unlikely() branch hint is dubious. It's perfect for the first 2^32 - * XIDs of a cluster's life. Right at 2^32 XIDs, misprediction shoots to - * 100%, then improves until perfection returns 2^31 XIDs later. Since - * current callers pass relatively-recent XIDs, expect >90% prediction - * accuracy overall. This favors average latency over tail latency. - */ - epoch = EpochFromFullTransactionId(nextFullXid); - if (unlikely(xid > XidFromFullTransactionId(nextFullXid))) - { - Assert(epoch != 0); - epoch--; - } - - return FullTransactionIdFromEpochAndXid(epoch, xid); -} - #endif /* FRONTEND */ #endif /* TRANSAM_H */ diff --git a/src/include/access/tupmacs.h b/src/include/access/tupmacs.h index 6240ec930e7..73abbb0ec9f 100644 --- a/src/include/access/tupmacs.h +++ b/src/include/access/tupmacs.h @@ -152,10 +152,11 @@ fetch_att(const void *T, bool attbyval, int attlen) ((attalign) == TYPALIGN_INT) ? INTALIGN(cur_offset) : \ (((attalign) == TYPALIGN_CHAR) ? (uintptr_t) (cur_offset) : \ (((attalign) == TYPALIGN_DOUBLE) ? DOUBLEALIGN(cur_offset) : \ + (((attalign) == TYPALIGN_XID) ? MAXALIGN(cur_offset) : \ ( \ AssertMacro((attalign) == TYPALIGN_SHORT), \ SHORTALIGN(cur_offset) \ - ))) \ + )))) \ ) /* diff --git a/src/include/access/xact.h b/src/include/access/xact.h index b2bc10ee041..a8392a80019 100644 --- a/src/include/access/xact.h +++ b/src/include/access/xact.h @@ -249,7 +249,7 @@ typedef struct xl_xact_xinfo * Commit records can be large, so copying large portions isn't * attractive. */ - uint32 xinfo; + uint64 xinfo; } xl_xact_xinfo; typedef struct xl_xact_dbinfo @@ -308,7 +308,12 @@ typedef struct xl_xact_invals typedef struct xl_xact_twophase { - TransactionId xid; + /* + * TransactionId is split into 32-bit parts because xl_xact_twophase is + * only int-aligned. + */ + uint32 xid_lo; + uint32 xid_hi; } xl_xact_twophase; typedef struct xl_xact_origin @@ -327,7 +332,7 @@ typedef struct xl_xact_commit /* xl_xact_relfilelocators follows if XINFO_HAS_RELFILELOCATORS */ /* xl_xact_stats_items follows if XINFO_HAS_DROPPED_STATS */ /* xl_xact_invals follows if XINFO_HAS_INVALS */ - /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */ /* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */ /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ } xl_xact_commit; @@ -343,7 +348,7 @@ typedef struct xl_xact_abort /* xl_xact_relfilelocators follows if XINFO_HAS_RELFILELOCATORS */ /* xl_xact_stats_items follows if XINFO_HAS_DROPPED_STATS */ /* No invalidation messages needed. */ - /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE */ + /* xl_xact_twophase follows if XINFO_HAS_TWOPHASE (xid is int-aligned!) */ /* twophase_gid follows if XINFO_HAS_GID. As a null-terminated string. */ /* xl_xact_origin follows if XINFO_HAS_ORIGIN, stored unaligned! */ } xl_xact_abort; diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index cf057f033a2..04e00380fc5 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -39,6 +39,7 @@ #define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image * is taken */ #define REGBUF_NO_CHANGE 0x20 /* intentionally register clean buffer */ +#define REGBUF_CONVERTED 0x40 /* buffer had format convertion */ /* prototypes for public functions in xloginsert.c: */ extern void XLogBeginInsert(void); diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index 9738462d3c9..a3305586993 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -427,10 +427,6 @@ extern bool DecodeXLogRecord(XLogReaderState *state, #define XLogRecHasBlockData(decoder, block_id) \ ((decoder)->record->blocks[block_id].has_data) -#ifndef FRONTEND -extern FullTransactionId XLogRecGetFullXid(XLogReaderState *record); -#endif - extern bool RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page); extern char *XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len); extern void XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id, diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h index a06833ce0a3..a0944e4a51d 100644 --- a/src/include/access/xlogrecord.h +++ b/src/include/access/xlogrecord.h @@ -41,18 +41,17 @@ typedef struct XLogRecord { uint32 xl_tot_len; /* total len of entire record */ + pg_crc32c xl_crc; /* CRC for this record */ TransactionId xl_xid; /* xact id */ XLogRecPtr xl_prev; /* ptr to previous record in log */ uint8 xl_info; /* flag bits, see below */ RmgrId xl_rmid; /* resource manager for this record */ - /* 2 bytes of padding here, initialize to zero */ - pg_crc32c xl_crc; /* CRC for this record */ /* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */ } XLogRecord; -#define SizeOfXLogRecord (offsetof(XLogRecord, xl_crc) + sizeof(pg_crc32c)) +#define SizeOfXLogRecord (offsetof(XLogRecord, xl_rmid) + sizeof(RmgrId)) /* * The high 4 bits in xl_info may be used freely by rmgr. The diff --git a/src/include/c.h b/src/include/c.h index 68d31deb278..7f7fc02ab0e 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -620,20 +620,30 @@ typedef double float8; typedef Oid regproc; typedef regproc RegProcedure; -typedef uint32 TransactionId; +typedef uint64 TransactionId; -typedef uint32 LocalTransactionId; +extern bool TransactionIdPrecedes(TransactionId id1, TransactionId id2); +extern bool TransactionIdPrecedesOrEquals(TransactionId id1, TransactionId id2); +extern bool TransactionIdFollows(TransactionId id1, TransactionId id2); +extern bool TransactionIdFollowsOrEquals(TransactionId id1, TransactionId id2); -typedef uint32 SubTransactionId; +typedef uint32 ShortTransactionId; +typedef uint64 LocalTransactionId; +typedef uint64 SubTransactionId; -#define InvalidSubTransactionId ((SubTransactionId) 0) -#define TopSubTransactionId ((SubTransactionId) 1) +#define InvalidSubTransactionId ((SubTransactionId) 0) +#define TopSubTransactionId ((SubTransactionId) 1) /* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */ typedef TransactionId MultiXactId; typedef uint64 MultiXactOffset; +#define MAX_START_XID UINT64CONST(0x3FFFFFFFFFFFFFFF) /* 2^62 - 1 */ +#define StartTransactionIdIsValid(xid) ((xid) <= MAX_START_XID) +#define StartMultiXactIdIsValid(mxid) ((mxid) <= MAX_START_XID) +#define StartMultiXactOffsetIsValid(mxoff) ((mxoff) <= MAX_START_XID) + typedef uint32 CommandId; #define FirstCommandId ((CommandId) 0) @@ -806,7 +816,6 @@ typedef NameData *Name; /* we don't currently need wider versions of the other ALIGN macros */ #define MAXALIGN64(LEN) TYPEALIGN64(MAXIMUM_ALIGNOF, (LEN)) - /* ---------------------------------------------------------------- * Section 6: assertions * ---------------------------------------------------------------- diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index d32758981e1..cde665fae0e 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,7 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202503261 +/* XXX: should de changed to actual version on commit */ +#define CATALOG_VERSION_NO 999999999 #endif diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat index 6d9dc1528d6..6b1f1fdfed7 100644 --- a/src/include/catalog/pg_operator.dat +++ b/src/include/catalog/pg_operator.dat @@ -183,16 +183,16 @@ oprresult => 'bool', oprcom => '=(xid,xid)', oprnegate => '<>(xid,xid)', oprcode => 'xideq', oprrest => 'eqsel', oprjoin => 'eqjoinsel' }, { oid => '353', descr => 'equal', - oprname => '=', oprleft => 'xid', oprright => 'int4', oprresult => 'bool', - oprnegate => '<>(xid,int4)', oprcode => 'xideqint4', oprrest => 'eqsel', + oprname => '=', oprleft => 'xid', oprright => 'int8', oprresult => 'bool', + oprnegate => '<>(xid,int8)', oprcode => 'xideqint8', oprrest => 'eqsel', oprjoin => 'eqjoinsel' }, { oid => '3315', descr => 'not equal', oprname => '<>', oprleft => 'xid', oprright => 'xid', oprresult => 'bool', oprcom => '<>(xid,xid)', oprnegate => '=(xid,xid)', oprcode => 'xidneq', oprrest => 'neqsel', oprjoin => 'neqjoinsel' }, { oid => '3316', descr => 'not equal', - oprname => '<>', oprleft => 'xid', oprright => 'int4', oprresult => 'bool', - oprnegate => '=(xid,int4)', oprcode => 'xidneqint4', oprrest => 'neqsel', + oprname => '<>', oprleft => 'xid', oprright => 'int8', oprresult => 'bool', + oprnegate => '=(xid,int8)', oprcode => 'xidneqint8', oprrest => 'neqsel', oprjoin => 'neqjoinsel' }, { oid => '5068', descr => 'equal', oprname => '=', oprcanmerge => 't', oprcanhash => 't', oprleft => 'xid8', diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index df0370256dc..dd36a8938ed 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -2471,10 +2471,10 @@ { oid => '1181', descr => 'age of a transaction ID, in transactions before current transaction', proname => 'age', provolatile => 's', proparallel => 'r', - prorettype => 'int4', proargtypes => 'xid', prosrc => 'xid_age' }, + prorettype => 'int8', proargtypes => 'xid', prosrc => 'xid_age' }, { oid => '3939', descr => 'age of a multi-transaction ID, in multi-transactions before current multi-transaction', - proname => 'mxid_age', provolatile => 's', prorettype => 'int4', + proname => 'mxid_age', provolatile => 's', prorettype => 'int8', proargtypes => 'xid', prosrc => 'mxid_age' }, { oid => '1188', @@ -2825,11 +2825,11 @@ prosrc => 'bpcharlen' }, { oid => '1319', - proname => 'xideqint4', proleakproof => 't', prorettype => 'bool', - proargtypes => 'xid int4', prosrc => 'xideq' }, + proname => 'xideqint8', proleakproof => 't', prorettype => 'bool', + proargtypes => 'xid int8', prosrc => 'xideq' }, { oid => '3309', - proname => 'xidneqint4', proleakproof => 't', prorettype => 'bool', - proargtypes => 'xid int4', prosrc => 'xidneq' }, + proname => 'xidneqint8', proleakproof => 't', prorettype => 'bool', + proargtypes => 'xid int8', prosrc => 'xidneq' }, { oid => '1326', proname => 'interval_div', prorettype => 'interval', diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index 6dca77e0a22..959ead556d4 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -95,9 +95,9 @@ typinput => 'tidin', typoutput => 'tidout', typreceive => 'tidrecv', typsend => 'tidsend', typalign => 's' }, { oid => '28', array_type_oid => '1011', descr => 'transaction id', - typname => 'xid', typlen => '4', typbyval => 't', typcategory => 'U', + typname => 'xid', typlen => '8', typbyval => 'FLOAT8PASSBYVAL', typcategory => 'U', typinput => 'xidin', typoutput => 'xidout', typreceive => 'xidrecv', - typsend => 'xidsend', typalign => 'i' }, + typsend => 'xidsend', typalign => 'x' }, { oid => '29', array_type_oid => '1012', descr => 'command identifier type, sequence in transaction id', typname => 'cid', typlen => '4', typbyval => 't', typcategory => 'U', diff --git a/src/include/catalog/pg_type.h b/src/include/catalog/pg_type.h index ff666711a54..a52133e01b6 100644 --- a/src/include/catalog/pg_type.h +++ b/src/include/catalog/pg_type.h @@ -303,6 +303,11 @@ MAKE_SYSCACHE(TYPENAMENSP, pg_type_typname_nsp_index, 64); #define TYPALIGN_SHORT 's' /* short alignment (typically 2 bytes) */ #define TYPALIGN_INT 'i' /* int alignment (typically 4 bytes) */ #define TYPALIGN_DOUBLE 'd' /* double alignment (often 8 bytes) */ +/* + * We need to use alignment sutable for 8-byte XID values. + * On system like AIX double alignment (4 bytes) is not enough. + */ +#define TYPALIGN_XID 'x' #define TYPSTORAGE_PLAIN 'p' /* type not prepared for toasting */ #define TYPSTORAGE_EXTERNAL 'e' /* toastable, don't try to compress */ diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index bc37a80dc74..bfabcbbf55b 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -216,12 +216,12 @@ typedef enum VacOptValue */ typedef struct VacuumParams { - bits32 options; /* bitmask of VACOPT_* */ - int freeze_min_age; /* min freeze age, -1 to use default */ - int freeze_table_age; /* age at which to scan whole table */ - int multixact_freeze_min_age; /* min multixact freeze age, -1 to + bits32 options; /* bitmask of VacuumOption */ + int64 freeze_min_age; /* min freeze age, -1 to use default */ + int64 freeze_table_age; /* age at which to scan whole table */ + int64 multixact_freeze_min_age; /* min multixact freeze age, -1 to * use default */ - int multixact_freeze_table_age; /* multixact age at which to scan + int64 multixact_freeze_table_age; /* multixact age at which to scan * whole table */ bool is_wraparound; /* force a for-wraparound vacuum */ int log_min_duration; /* minimum execution threshold in ms at @@ -297,12 +297,12 @@ typedef struct VacDeadItemsInfo /* GUC parameters */ extern PGDLLIMPORT int default_statistics_target; /* PGDLLIMPORT for PostGIS */ -extern PGDLLIMPORT int vacuum_freeze_min_age; -extern PGDLLIMPORT int vacuum_freeze_table_age; -extern PGDLLIMPORT int vacuum_multixact_freeze_min_age; -extern PGDLLIMPORT int vacuum_multixact_freeze_table_age; -extern PGDLLIMPORT int vacuum_failsafe_age; -extern PGDLLIMPORT int vacuum_multixact_failsafe_age; +extern PGDLLIMPORT int64 vacuum_freeze_min_age; +extern PGDLLIMPORT int64 vacuum_freeze_table_age; +extern PGDLLIMPORT int64 vacuum_multixact_freeze_min_age; +extern PGDLLIMPORT int64 vacuum_multixact_freeze_table_age; +extern PGDLLIMPORT int64 vacuum_failsafe_age; +extern PGDLLIMPORT int64 vacuum_multixact_failsafe_age; extern PGDLLIMPORT bool track_cost_delay_timing; extern PGDLLIMPORT bool vacuum_truncate; diff --git a/src/include/fmgr.h b/src/include/fmgr.h index 82ee38b31e5..0be6a4d5f6b 100644 --- a/src/include/fmgr.h +++ b/src/include/fmgr.h @@ -281,6 +281,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum); #define PG_GETARG_FLOAT4(n) DatumGetFloat4(PG_GETARG_DATUM(n)) #define PG_GETARG_FLOAT8(n) DatumGetFloat8(PG_GETARG_DATUM(n)) #define PG_GETARG_INT64(n) DatumGetInt64(PG_GETARG_DATUM(n)) +#define PG_GETARG_TRANSACTIONID(n) DatumGetTransactionId(PG_GETARG_DATUM(n)) /* use this if you want the raw, possibly-toasted input datum: */ #define PG_GETARG_RAW_VARLENA_P(n) ((struct varlena *) PG_GETARG_POINTER(n)) /* use this if you want the input datum de-toasted: */ @@ -367,6 +368,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum); #define PG_RETURN_FLOAT8(x) return Float8GetDatum(x) #define PG_RETURN_INT64(x) return Int64GetDatum(x) #define PG_RETURN_UINT64(x) return UInt64GetDatum(x) +#define PG_RETURN_TRANSACTIONID(x) return TransactionIdGetDatum(x) /* RETURN macros for other pass-by-ref types will typically look like this: */ #define PG_RETURN_BYTEA_P(x) PG_RETURN_POINTER(x) #define PG_RETURN_TEXT_P(x) PG_RETURN_POINTER(x) diff --git a/src/include/nodes/pg_list.h b/src/include/nodes/pg_list.h index 4d1cdbbcfdd..136745d932e 100644 --- a/src/include/nodes/pg_list.h +++ b/src/include/nodes/pg_list.h @@ -46,6 +46,7 @@ typedef union ListCell { void *ptr_value; int int_value; + int64 int64_value; Oid oid_value; TransactionId xid_value; } ListCell; @@ -171,6 +172,7 @@ list_length(const List *l) */ #define lfirst(lc) ((lc)->ptr_value) #define lfirst_int(lc) ((lc)->int_value) +#define lfirst_int64(lc) ((lc)->int64_value) #define lfirst_oid(lc) ((lc)->oid_value) #define lfirst_xid(lc) ((lc)->xid_value) #define lfirst_node(type,lc) castNode(type, lfirst(lc)) @@ -197,6 +199,7 @@ list_length(const List *l) #define llast(l) lfirst(list_last_cell(l)) #define llast_int(l) lfirst_int(list_last_cell(l)) +#define llast_int64(l) lfirst_int64(list_last_cell(l)) #define llast_oid(l) lfirst_oid(list_last_cell(l)) #define llast_xid(l) lfirst_xid(list_last_cell(l)) #define llast_node(type,l) castNode(type, llast(l)) @@ -610,6 +613,7 @@ extern List *list_make5_impl(NodeTag t, ListCell datum1, ListCell datum2, pg_nodiscard extern List *lappend(List *list, void *datum); pg_nodiscard extern List *lappend_int(List *list, int datum); +pg_nodiscard extern List *lappend_int64(List *list, int64 datum); pg_nodiscard extern List *lappend_oid(List *list, Oid datum); pg_nodiscard extern List *lappend_xid(List *list, TransactionId datum); diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index db6454090d2..2f8cbc9b768 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -739,6 +739,9 @@ # endif #endif +/* Postgres Pro use 64bit xids */ +#undef XID_IS_64BIT + /* Size of a WAL file block. This need have no particular relation to BLCKSZ. XLOG_BLCKSZ must be a power of 2, and if your system supports O_DIRECT I/O, XLOG_BLCKSZ must be a multiple of the alignment requirement for direct-I/O diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h index 20f7497dcb7..164acbc73dd 100644 --- a/src/include/port/pg_lfind.h +++ b/src/include/port/pg_lfind.h @@ -206,4 +206,66 @@ pg_lfind32(uint32 key, const uint32 *base, uint32 nelem) #endif } +/* + * pg_lfind64 + * + * Return true if there is an element in 'base' that equals 'key', otherwise + * return false. + */ +static inline bool +pg_lfind64(uint64 key, uint64 *base, uint32 nelem) +{ + uint32 i, + iterations; +#if defined(USE_ASSERT_CHECKING) + bool assert_result = false; + + /* pre-compute the result for assert checking */ + for (i = 0; i < nelem; ++i) + { + if (key == base[i]) + { + assert_result = true; + break; + } + } +#endif + +#define UNROLL_FACTOR 8 + StaticAssertStmt((UNROLL_FACTOR & (UNROLL_FACTOR - 1)) == 0, + "Loop unroll factor must be power of 2"); + iterations = nelem & ~(UNROLL_FACTOR - 1); + for (i = 0; i < iterations; i += UNROLL_FACTOR) + { + if (base[0] == key || base[1] == key || base[2] == key || + base[3] == key || base[4] == key || base[5] == key || + base[6] == key || base[7] == key) + { +#if defined(USE_ASSERT_CHECKING) + Assert(assert_result == true); +#endif + return true; + } + base += UNROLL_FACTOR; + } + + /* Process the remaining elements one at a time. */ + iterations = nelem & (UNROLL_FACTOR - 1); + for (i = 0; i < iterations; ++i) + { + if (key == *base++) + { +#if defined(USE_ASSERT_CHECKING) + Assert(assert_result == true); +#endif + return true; + } + } + +#if defined(USE_ASSERT_CHECKING) + Assert(assert_result == false); +#endif + return false; +} + #endif /* PG_LFIND_H */ diff --git a/src/include/postgres.h b/src/include/postgres.h index 8a41a668687..133f40e3ff9 100644 --- a/src/include/postgres.h +++ b/src/include/postgres.h @@ -85,6 +85,9 @@ typedef struct NullableDatum #define SIZEOF_DATUM SIZEOF_VOID_P +static uint64 DatumGetUInt64(Datum X); +static Datum UInt64GetDatum(uint64 X); + /* * DatumGetBool * Returns boolean value of a datum. @@ -266,7 +269,7 @@ ObjectIdGetDatum(Oid X) static inline TransactionId DatumGetTransactionId(Datum X) { - return (TransactionId) X; + return DatumGetUInt64(X); } /* @@ -276,7 +279,7 @@ DatumGetTransactionId(Datum X) static inline Datum TransactionIdGetDatum(TransactionId X) { - return (Datum) X; + return UInt64GetDatum(X); } /* @@ -286,7 +289,7 @@ TransactionIdGetDatum(TransactionId X) static inline Datum MultiXactIdGetDatum(MultiXactId X) { - return (Datum) X; + return UInt64GetDatum(X); } /* diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index d80817adc8e..7c1bfe097ea 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -39,8 +39,8 @@ extern PGDLLIMPORT int64 autovacuum_vac_ins_thresh; extern PGDLLIMPORT double autovacuum_vac_ins_scale; extern PGDLLIMPORT int64 autovacuum_anl_thresh; extern PGDLLIMPORT double autovacuum_anl_scale; -extern PGDLLIMPORT int autovacuum_freeze_max_age; -extern PGDLLIMPORT int autovacuum_multixact_freeze_max_age; +extern PGDLLIMPORT int64 autovacuum_freeze_max_age; +extern PGDLLIMPORT int64 autovacuum_multixact_freeze_max_age; extern PGDLLIMPORT double autovacuum_vac_cost_delay; extern PGDLLIMPORT int autovacuum_vac_cost_limit; diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 9327f60c44c..032bed11880 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -32,8 +32,8 @@ * Buffer state is a single 32-bit variable where following data is combined. * * - 18 bits refcount - * - 4 bits usage count - * - 10 bits of flags + * - 3 bits usage count + * - 11 bits of flags * * Combining these values allows to perform some operations without locking * the buffer header, by modifying them together with a CAS loop. @@ -41,8 +41,8 @@ * The definition of buffer state components is below. */ #define BUF_REFCOUNT_BITS 18 -#define BUF_USAGECOUNT_BITS 4 -#define BUF_FLAG_BITS 10 +#define BUF_USAGECOUNT_BITS 3 +#define BUF_FLAG_BITS 11 StaticAssertDecl(BUF_REFCOUNT_BITS + BUF_USAGECOUNT_BITS + BUF_FLAG_BITS == 32, "parts of buffer state space need to equal 32"); @@ -64,6 +64,7 @@ StaticAssertDecl(BUF_REFCOUNT_BITS + BUF_USAGECOUNT_BITS + BUF_FLAG_BITS == 32, * Note: BM_TAG_VALID essentially means that there is a buffer hashtable * entry associated with the buffer's tag. */ +#define BM_CONVERTED (1U << 21) /* buffer were converted to 64xid */ #define BM_LOCKED (1U << 22) /* buffer header is locked */ #define BM_DIRTY (1U << 23) /* data needs writing */ #define BM_VALID (1U << 24) /* data is valid */ diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 538b890a51d..0481d1bff24 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -280,8 +280,12 @@ extern void BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum, BlockNumber *blknum); extern void MarkBufferDirtyHint(Buffer buffer, bool buffer_std); +extern void MarkBufferConverted(Buffer buffer, bool converted); +extern bool IsBufferConverted(Buffer buffer); extern void UnlockBuffers(void); +extern bool IsBufferLocked(Buffer buffer); +extern bool IsBufferLockedExclusive(Buffer buffer); extern void LockBuffer(Buffer buffer, int mode); extern bool ConditionalLockBuffer(Buffer buffer); extern void LockBufferForCleanup(Buffer buffer); @@ -317,6 +321,8 @@ extern int GetAccessStrategyPinLimit(BufferAccessStrategy strategy); extern void FreeAccessStrategy(BufferAccessStrategy strategy); +/* old tuple format support */ +extern void convert_page(Relation rel, Buffer buf, BlockNumber blkno); /* inline functions */ diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 6646b6f6371..0bc51b3ec52 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -14,10 +14,13 @@ #ifndef BUFPAGE_H #define BUFPAGE_H +#include "access/transam.h" #include "access/xlogdefs.h" #include "storage/block.h" #include "storage/item.h" #include "storage/off.h" +#include "postgres.h" +#include "utils/rel.h" /* GUC variable */ extern PGDLLIMPORT bool ignore_checksum_failure; @@ -167,12 +170,41 @@ typedef struct PageHeaderData LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_special; /* offset to start of special space */ uint16 pd_pagesize_version; - TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */ + ShortTransactionId pd_prune_xid; /* oldest prunable XID, or zero if + * none */ ItemIdData pd_linp[FLEXIBLE_ARRAY_MEMBER]; /* line pointer array */ } PageHeaderData; typedef PageHeaderData *PageHeader; + +/* + * HeapPageSpecialData -- data that stored at the end of each heap page. + * + * pd_xid_base - base value for transaction IDs on page + * pd_multi_base - base value for multixact IDs on page + * + * pd_xid_base and pd_multi_base are base values for calculation of transaction + * identifiers from t_xmin and t_xmax in each heap tuple header on the page. + */ +typedef struct HeapPageSpecialData +{ + TransactionId pd_xid_base; /* base value for transaction IDs on page */ + TransactionId pd_multi_base; /* base value for multixact IDs on page */ +} HeapPageSpecialData; + +typedef HeapPageSpecialData *HeapPageSpecial; + +typedef struct ToastPageSpecialData +{ + TransactionId pd_xid_base; /* base value for transaction IDs on page */ +} ToastPageSpecialData; + +typedef ToastPageSpecialData *ToastPageSpecial; + +extern PGDLLIMPORT HeapPageSpecial heapDoubleXmaxSpecial; +extern PGDLLIMPORT ToastPageSpecial toastDoubleXmaxSpecial; + /* * pd_flags contains the following flag bits. Undefined bits are initialized * to zero and may be used in the future. @@ -204,7 +236,7 @@ typedef PageHeaderData *PageHeader; * As of Release 9.3, the checksum version must also be considered when * handling pages. */ -#define PG_PAGE_LAYOUT_VERSION 4 +#define PG_PAGE_LAYOUT_VERSION 5 #define PG_DATA_CHECKSUM_VERSION 1 /* ---------------------------------------------------------------- @@ -443,18 +475,177 @@ PageClearAllVisible(Page page) } /* - * These two require "access/transam.h", so left as macros. + * Check if page is in "double xmax" format. */ -#define PageSetPrunable(page, xid) \ -do { \ - Assert(TransactionIdIsNormal(xid)); \ - if (!TransactionIdIsValid(((PageHeader) (page))->pd_prune_xid) || \ - TransactionIdPrecedes(xid, ((PageHeader) (page))->pd_prune_xid)) \ - ((PageHeader) (page))->pd_prune_xid = (xid); \ -} while (0) -#define PageClearPrunable(page) \ - (((PageHeader) (page))->pd_prune_xid = InvalidTransactionId) +static inline bool +HeapPageIsDoubleXmax(const PageData *page) +{ + return ((PageHeader) (page))->pd_special == BLCKSZ; +} +/* + * Get pointer to HeapPageSpecialData. + * + * Can be used for non-consistent reads from non-locked pages. + * + * Return doubleXmaxSpecial when pd_special == BLCKSZ (i.e. "double xmax" + * format). + */ +static inline HeapPageSpecial +HeapPageGetSpecialNoAssert(Page page) +{ + if (HeapPageIsDoubleXmax(page)) + return heapDoubleXmaxSpecial; + + return (HeapPageSpecial) ((char *) page + + ((PageHeader) page)->pd_special); +} + +/* + * Get pointer to ToastPageSpecialData. + * + * Can be used for non-consistent reads from non-locked pages. + * + * Return doubleXmaxSpecial when pd_special == BLCKSZ (i.e. "double xmax" + * format). + */ +static inline ToastPageSpecial +ToastPageGetSpecialNoAssert(Page page) +{ + if (HeapPageIsDoubleXmax(page)) + return toastDoubleXmaxSpecial; + + return (ToastPageSpecial) ((char *) page + + ((PageHeader) page)->pd_special); +} + +/* + * Wrapper for HeapPageGetSpecialNoAssert for general use. + */ +static inline HeapPageSpecial +HeapPageGetSpecial(Page page) +{ + if (HeapPageIsDoubleXmax(page)) + return heapDoubleXmaxSpecial; + + Assert(((PageHeader) page)->pd_special == + BLCKSZ - MAXALIGN(sizeof(HeapPageSpecialData))); + + return (HeapPageSpecial) ((char *) page + + ((PageHeader) page)->pd_special); +} + +/* + * Wrapper for ToastPageGetSpecialNoAssert for general use. + */ +static inline ToastPageSpecial +ToastPageGetSpecial(Page page) +{ + if (HeapPageIsDoubleXmax(page)) + return toastDoubleXmaxSpecial; + + Assert(((PageHeader) page)->pd_special == + BLCKSZ - MAXALIGN(sizeof(ToastPageSpecialData))); + + return (ToastPageSpecial) ((char *) page + + ((PageHeader) page)->pd_special); +} + +/* + * Set pd_prune_xid. + */ +static inline void +HeapPageSetPruneXid(Page page, TransactionId xid, bool is_toast) +{ + TransactionId base; + + if (HeapPageIsDoubleXmax(page)) + return; + + if (!TransactionIdIsNormal(xid)) + { + ((PageHeader) (page))->pd_prune_xid = xid; + return; + } + + base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base : + HeapPageGetSpecial(page)->pd_xid_base; + + ((PageHeader) (page))->pd_prune_xid = NormalTransactionIdToShort(base, xid); + Assert(((PageHeader) (page))->pd_prune_xid <= MaxShortTransactionId); +} + +static inline void +ToastPageSetPruneXid(Page page, TransactionId xid) +{ + if (HeapPageIsDoubleXmax(page)) + return; + + if (!TransactionIdIsNormal(xid)) + { + ((PageHeader) (page))->pd_prune_xid = xid; + return; + } + + ((PageHeader) (page))->pd_prune_xid = + NormalTransactionIdToShort(ToastPageGetSpecial(page)->pd_xid_base, (xid)); + + Assert(((PageHeader) (page))->pd_prune_xid <= MaxShortTransactionId); +} + +/* + * Get pd_prune_xid from locked page. + */ +static inline TransactionId +HeapPageGetPruneXid(Page page, bool is_toast) +{ + TransactionId base; + + if (HeapPageIsDoubleXmax(page)) + return ((PageHeader) (page))->pd_prune_xid; + + base = is_toast ? ToastPageGetSpecial(page)->pd_xid_base : + HeapPageGetSpecial(page)->pd_xid_base; + + return ShortTransactionIdToNormal(base, + ((PageHeader) (page))->pd_prune_xid); +} + +static inline void +PageSetPrunable(Page page, TransactionId xid, bool is_toast) +{ + TransactionId prune_xid; + + Assert(TransactionIdIsNormal(xid)); + + if (HeapPageIsDoubleXmax(page)) + return; + + prune_xid = HeapPageGetPruneXid(page, is_toast); + if ((!TransactionIdIsValid(prune_xid) || + TransactionIdPrecedes(xid, prune_xid))) + { + HeapPageSetPruneXid(page, xid, is_toast); + } +} + +/* + * Get pd_prune_xid from non-locked page. May return invalid value, but doen't + * causes assert failures. + */ +static inline TransactionId +HeapPageGetPruneXidNoAssert(Page page, bool is_toast) +{ + TransactionId base; + + if (HeapPageIsDoubleXmax(page)) + return ((PageHeader) (page))->pd_prune_xid; + + base = is_toast ? ToastPageGetSpecialNoAssert(page)->pd_xid_base : + HeapPageGetSpecialNoAssert(page)->pd_xid_base; + return ShortTransactionIdToNormal(base, + ((PageHeader) (page))->pd_prune_xid); +} /* ---------------------------------------------------------------- * extern declarations @@ -488,6 +679,21 @@ do { \ StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)), "BLCKSZ has to be a multiple of sizeof(size_t)"); +/* + * Tuple defrag support for PageRepairFragmentation and PageIndexMultiDelete + */ +typedef struct ItemIdCompactData +{ + uint16 offsetindex; /* linp array index */ + int16 itemoff; /* page offset of item data */ + uint16 alignedlen; /* MAXALIGN(item data len) */ +} ItemIdCompactData; + +typedef ItemIdCompactData *ItemIdCompact; +typedef RelationData *Relation; + +extern int itemoffcompare(const void *item1, const void *item2); + extern void PageInit(Page page, Size pageSize, Size specialSize); extern bool PageIsVerifiedExtended(PageData *page, BlockNumber blkno, int flags); extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, @@ -496,7 +702,7 @@ extern Page PageGetTempPage(const PageData *page); extern Page PageGetTempPageCopy(const PageData *page); extern Page PageGetTempPageCopySpecial(const PageData *page); extern void PageRestoreTempPage(Page tempPage, Page oldPage); -extern void PageRepairFragmentation(Page page); +extern void PageRepairFragmentation(Page page, bool is_toast); extern void PageTruncateLinePointerArray(Page page); extern Size PageGetFreeSpace(const PageData *page); extern Size PageGetFreeSpaceForMultipleTuples(const PageData *page, int ntups); diff --git a/src/include/storage/itemid.h b/src/include/storage/itemid.h index bfefacaab5c..ce216c1f868 100644 --- a/src/include/storage/itemid.h +++ b/src/include/storage/itemid.h @@ -78,6 +78,8 @@ typedef uint16 ItemLength; #define ItemIdGetRedirect(itemId) \ ((itemId)->lp_off) +#define ItemIdGetTupleEnd(itemId) \ + (MAXALIGN(ItemIdGetLength((itemId))) + ItemIdGetOffset((itemId))) /* * ItemIdIsValid * True iff item identifier is valid. diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index ad4e40badbe..47a541a7047 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -225,8 +225,8 @@ typedef struct LOCKTAG /* ID info for a transaction is its TransactionId */ #define SET_LOCKTAG_TRANSACTION(locktag,xid) \ - ((locktag).locktag_field1 = (xid), \ - (locktag).locktag_field2 = 0, \ + ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \ + (locktag).locktag_field2 = (uint32)((xid) >> 32), \ (locktag).locktag_field3 = 0, \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_TRANSACTION, \ @@ -235,8 +235,8 @@ typedef struct LOCKTAG /* ID info for a virtual transaction is its VirtualTransactionId */ #define SET_LOCKTAG_VIRTUALTRANSACTION(locktag,vxid) \ ((locktag).locktag_field1 = (vxid).procNumber, \ - (locktag).locktag_field2 = (vxid).localTransactionId, \ - (locktag).locktag_field3 = 0, \ + (locktag).locktag_field2 = (uint32)((vxid).localTransactionId & 0xFFFFFFFF), \ + (locktag).locktag_field3 = (uint32)((vxid).localTransactionId >> 32), \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_VIRTUALTRANSACTION, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) @@ -246,9 +246,9 @@ typedef struct LOCKTAG * its speculative insert counter. */ #define SET_LOCKTAG_SPECULATIVE_INSERTION(locktag,xid,token) \ - ((locktag).locktag_field1 = (xid), \ - (locktag).locktag_field2 = (token), \ - (locktag).locktag_field3 = 0, \ + ((locktag).locktag_field1 = (uint32)((xid) & 0xFFFFFFFF), \ + (locktag).locktag_field2 = (uint32)((xid) >> 32), \ + (locktag).locktag_field3 = (token), \ (locktag).locktag_field4 = 0, \ (locktag).locktag_type = LOCKTAG_SPECULATIVE_TOKEN, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index f51b03d3822..1cd0a19acfb 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -17,6 +17,7 @@ #include "access/clog.h" #include "access/xlogdefs.h" #include "lib/ilist.h" +#include "port/atomics.h" #include "storage/latch.h" #include "storage/lock.h" #include "storage/pg_sema.h" @@ -176,12 +177,12 @@ struct PGPROC Latch procLatch; /* generic latch for process */ - TransactionId xid; /* id of top-level transaction currently being + pg_atomic_uint64 xid; /* id of top-level transaction currently being * executed by this proc, if running and XID * is assigned; else InvalidTransactionId. * mirrored in ProcGlobal->xids[pgxactoff] */ - TransactionId xmin; /* minimal running XID as it was when we were + pg_atomic_uint64 xmin; /* minimal running XID as it was when we were * starting our xact, excluding LAZY VACUUM: * vacuum must not remove tuples deleted by * xid >= xmin ! */ @@ -378,7 +379,7 @@ typedef struct PROC_HDR PGPROC *allProcs; /* Array mirroring PGPROC.xid for each PGPROC currently in the procarray */ - TransactionId *xids; + pg_atomic_uint64 *xids; /* * Array mirroring PGPROC.subxidStatus for each PGPROC currently in the diff --git a/src/include/utils/combocid.h b/src/include/utils/combocid.h index 4a2069b2869..4fcb899fcda 100644 --- a/src/include/utils/combocid.h +++ b/src/include/utils/combocid.h @@ -15,7 +15,7 @@ #define COMBOCID_H /* - * HeapTupleHeaderGetCmin and HeapTupleHeaderGetCmax function prototypes + * HeapTupleGetCmin and HeapTupleGetCmax function prototypes * are in access/htup.h, because that's where the macro definitions that * those functions replaced used to be. */ diff --git a/src/include/utils/xid8.h b/src/include/utils/xid8.h index 35399b7f6fb..4185e5c34f0 100644 --- a/src/include/utils/xid8.h +++ b/src/include/utils/xid8.h @@ -17,13 +17,13 @@ static inline FullTransactionId DatumGetFullTransactionId(Datum X) { - return FullTransactionIdFromU64(DatumGetUInt64(X)); + return FullTransactionIdFromXid(DatumGetUInt64(X)); } static inline Datum FullTransactionIdGetDatum(FullTransactionId X) { - return UInt64GetDatum(U64FromFullTransactionId(X)); + return UInt64GetDatum(XidFromFullTransactionId(X)); } #define PG_GETARG_FULLTRANSACTIONID(X) DatumGetFullTransactionId(PG_GETARG_DATUM(X)) diff --git a/src/pl/plperl/plperl.c b/src/pl/plperl/plperl.c index ebf55fe663c..4c41db64bf3 100644 --- a/src/pl/plperl/plperl.c +++ b/src/pl/plperl/plperl.c @@ -2675,7 +2675,7 @@ validate_plperl_function(plperl_proc_ptr *proc_ptr, HeapTuple procTup) * This is needed because CREATE OR REPLACE FUNCTION can modify the * function's pg_proc entry without changing its OID. ************************************************************/ - uptodate = (prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + uptodate = (prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)); if (uptodate) @@ -2799,7 +2799,7 @@ compile_plperl_function(Oid fn_oid, bool is_trigger, bool is_event_trigger) MemoryContextSetIdentifier(proc_cxt, prodesc->proname); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; - prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + prodesc->fn_xmin = HeapTupleGetRawXmin(procTup); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index 6fdba95962d..6b7c4e054d9 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -168,7 +168,7 @@ recheck: if (function) { /* We have a compiled function, but is it still valid? */ - if (function->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + if (function->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&function->fn_tid, &procTup->t_self)) function_valid = true; else @@ -349,7 +349,7 @@ do_compile(FunctionCallInfo fcinfo, function->fn_signature = format_procedure(fcinfo->flinfo->fn_oid); MemoryContextSetIdentifier(func_cxt, function->fn_signature); function->fn_oid = fcinfo->flinfo->fn_oid; - function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + function->fn_xmin = HeapTupleGetRawXmin(procTup); function->fn_tid = procTup->t_self; function->fn_input_collation = fcinfo->fncollation; function->fn_cxt = func_cxt; diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index bb99781c56e..3ce78ff16dc 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -7566,6 +7566,7 @@ deconstruct_composite_datum(Datum value, HeapTupleData *tmptup) tmptup->t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup->t_self)); tmptup->t_tableOid = InvalidOid; + HeapTupleSetZeroXids(tmptup); tmptup->t_data = td; /* Extract rowtype info and find a tupdesc */ @@ -7740,6 +7741,7 @@ exec_move_row_from_datum(PLpgSQL_execstate *estate, tmptup.t_len = HeapTupleHeaderGetDatumLength(td); ItemPointerSetInvalid(&(tmptup.t_self)); tmptup.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tmptup); tmptup.t_data = td; /* Extract rowtype info */ diff --git a/src/pl/plpython/plpy_procedure.c b/src/pl/plpython/plpy_procedure.c index b494eeb474f..ac1fc85fe17 100644 --- a/src/pl/plpython/plpy_procedure.c +++ b/src/pl/plpython/plpy_procedure.c @@ -175,7 +175,7 @@ PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger) proc->proname = pstrdup(NameStr(procStruct->proname)); MemoryContextSetIdentifier(cxt, proc->proname); proc->pyname = pstrdup(procName); - proc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + proc->fn_xmin = HeapTupleGetRawXmin(procTup); proc->fn_tid = procTup->t_self; proc->fn_readonly = (procStruct->provolatile != PROVOLATILE_VOLATILE); proc->is_setof = procStruct->proretset; @@ -418,7 +418,7 @@ PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup) return false; /* If the pg_proc tuple has changed, it's not valid */ - if (!(proc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + if (!(proc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&proc->fn_tid, &procTup->t_self))) return false; diff --git a/src/pl/tcl/pltcl.c b/src/pl/tcl/pltcl.c index 08c8492050e..7e59ea9160d 100644 --- a/src/pl/tcl/pltcl.c +++ b/src/pl/tcl/pltcl.c @@ -1455,7 +1455,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid, ************************************************************/ if (prodesc != NULL && prodesc->internal_proname != NULL && - prodesc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) && + prodesc->fn_xmin == HeapTupleGetRawXmin(procTup) && ItemPointerEquals(&prodesc->fn_tid, &procTup->t_self)) { /* It's still up-to-date, so we can use it */ @@ -1589,7 +1589,7 @@ compile_pltcl_function(Oid fn_oid, Oid tgreloid, prodesc->internal_proname = pstrdup(internal_proname); prodesc->fn_cxt = proc_cxt; prodesc->fn_refcount = 0; - prodesc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + prodesc->fn_xmin = HeapTupleGetRawXmin(procTup); prodesc->fn_tid = procTup->t_self; prodesc->nargs = procStruct->pronargs; prodesc->arg_out_func = (FmgrInfo *) palloc0(prodesc->nargs * sizeof(FmgrInfo)); diff --git a/src/test/Makefile b/src/test/Makefile index 511a72e6238..b986d0e324e 100644 --- a/src/test/Makefile +++ b/src/test/Makefile @@ -12,7 +12,8 @@ subdir = src/test top_builddir = ../.. include $(top_builddir)/src/Makefile.global -SUBDIRS = perl postmaster regress isolation modules authentication recovery subscription +SUBDIRS = perl postmaster regress isolation modules authentication recovery subscription \ + xid-64 ifeq ($(with_icu),yes) SUBDIRS += icu diff --git a/src/test/meson.build b/src/test/meson.build index ccc31d6a86a..4680141c92b 100644 --- a/src/test/meson.build +++ b/src/test/meson.build @@ -8,6 +8,7 @@ subdir('postmaster') subdir('recovery') subdir('subscription') subdir('modules') +subdir('xid-64') if ssl.found() subdir('ssl') diff --git a/src/test/modules/injection_points/regress_injection.c b/src/test/modules/injection_points/regress_injection.c index 422f4168935..7bfab0dc6c3 100644 --- a/src/test/modules/injection_points/regress_injection.c +++ b/src/test/modules/injection_points/regress_injection.c @@ -66,6 +66,5 @@ removable_cutoff(PG_FUNCTION_ARGS) if (rel) table_close(rel, AccessShareLock); - PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromAllowableAt(next_fxid, - xid)); + PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromXid(xid)); } diff --git a/src/test/modules/test_lfind/test_lfind.c b/src/test/modules/test_lfind/test_lfind.c index 8dcaa8f9fda..b1e7f3f71e9 100644 --- a/src/test/modules/test_lfind/test_lfind.c +++ b/src/test/modules/test_lfind/test_lfind.c @@ -120,29 +120,29 @@ Datum test_lfind32(PG_FUNCTION_ARGS) { #define TEST_ARRAY_SIZE 135 - uint32 test_array[TEST_ARRAY_SIZE] = {0}; + uint64 test_array[TEST_ARRAY_SIZE] = {0}; test_array[8] = 1; test_array[64] = 2; test_array[TEST_ARRAY_SIZE - 1] = 3; - if (pg_lfind32(1, test_array, 4)) - elog(ERROR, "pg_lfind32() found nonexistent element"); - if (!pg_lfind32(1, test_array, TEST_ARRAY_SIZE)) - elog(ERROR, "pg_lfind32() did not find existing element"); + if (pg_lfind64(1, test_array, 4)) + elog(ERROR, "pg_lfind64() found nonexistent element"); + if (!pg_lfind64(1, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind64() did not find existing element"); - if (pg_lfind32(2, test_array, 32)) - elog(ERROR, "pg_lfind32() found nonexistent element"); - if (!pg_lfind32(2, test_array, TEST_ARRAY_SIZE)) - elog(ERROR, "pg_lfind32() did not find existing element"); + if (pg_lfind64(2, test_array, 32)) + elog(ERROR, "pg_lfind64() found nonexistent element"); + if (!pg_lfind64(2, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind64() did not find existing element"); - if (pg_lfind32(3, test_array, 96)) - elog(ERROR, "pg_lfind32() found nonexistent element"); - if (!pg_lfind32(3, test_array, TEST_ARRAY_SIZE)) - elog(ERROR, "pg_lfind32() did not find existing element"); + if (pg_lfind64(3, test_array, 96)) + elog(ERROR, "pg_lfind64() found nonexistent element"); + if (!pg_lfind64(3, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind64() did not find existing element"); - if (pg_lfind32(4, test_array, TEST_ARRAY_SIZE)) - elog(ERROR, "pg_lfind32() found nonexistent element"); + if (pg_lfind64(4, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind64() found nonexistent element"); PG_RETURN_VOID(); } diff --git a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm index 81a8f44aa9f..430d1d2ffad 100644 --- a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm +++ b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm @@ -132,6 +132,10 @@ sub adjust_database_contents 'drop table public.gtest_normal_child2'); } + # Can't upgrade xid type + _add_st($result, 'regression', + 'alter table public.tab_core_types drop column xid'); + # stuff not supported from release 14 if ($old_version < 14) { diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 8759ed2cbba..f62b5082d70 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -3155,7 +3155,7 @@ sub advance_wal_to_record_splitting_zone my ($self, $wal_block_size) = @_; # Size of record header. - my $RECORD_HEADER_SIZE = 24; + my $RECORD_HEADER_SIZE = 26; my $end_lsn = $self->_get_insert_lsn(); my $page_offset = $end_lsn % $wal_block_size; diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index 0ae2e982727..43fab8070ce 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -57,7 +57,7 @@ $node_primary->init(has_archiving => 1, allows_streaming => 1); # Bump the transaction ID epoch. This is useful to stress the portability # of recovery_target_xid parsing. -system_or_bail('pg_resetwal', '--epoch' => '1', $node_primary->data_dir); +system_or_bail('pg_resetwal', $node_primary->data_dir); # Start it $node_primary->start; diff --git a/src/test/recovery/t/039_end_of_wal.pl b/src/test/recovery/t/039_end_of_wal.pl index 47f9bb15e03..ebf2cf9fde8 100644 --- a/src/test/recovery/t/039_end_of_wal.pl +++ b/src/test/recovery/t/039_end_of_wal.pl @@ -70,17 +70,21 @@ sub build_record_header # This needs to follow the structure XLogRecord: # I for xl_tot_len - # I for xl_xid + # I for xl_crc + # II for xl_xid # II for xl_prev # C for xl_info # C for xl_rmid - # BB for two bytes of padding - # I for xl_crc - return pack("IIIICCBBI", - $xl_tot_len, $xl_xid, + # BBBBBB for two bytes of padding + return pack("IIIIIICCBBBBBB", + $xl_tot_len, + $xl_crc, + $BIG_ENDIAN ? 0 : $xl_xid, + $BIG_ENDIAN ? $xl_xid : 0, $BIG_ENDIAN ? 0 : $xl_prev, $BIG_ENDIAN ? $xl_prev : 0, - $xl_info, $xl_rmid, 0, 0, $xl_crc); + $xl_info, $xl_rmid, + 0, 0, 0, 0, 0, 0); } # Build a fake WAL page header, based on the data given by the caller @@ -147,7 +151,7 @@ $node->stop('immediate'); my $log_size = -s $node->logfile; $node->start; ok( $node->log_contains( - "invalid record length at .*: expected at least 24, got 0", $log_size + "invalid record length at .*: expected at least 26, got 0", $log_size ), "xl_tot_len zero"); @@ -159,7 +163,7 @@ $node->write_wal($TLI, $end_lsn, $WAL_SEGMENT_SIZE, build_record_header(23)); $log_size = -s $node->logfile; $node->start; ok( $node->log_contains( - "invalid record length at .*: expected at least 24, got 23", + "invalid record length at .*: expected at least 26, got 23", $log_size), "xl_tot_len short"); @@ -172,7 +176,7 @@ $node->write_wal($TLI, $end_lsn, $WAL_SEGMENT_SIZE, build_record_header(1)); $log_size = -s $node->logfile; $node->start; ok( $node->log_contains( - "invalid record length at .*: expected at least 24, got 1", $log_size + "invalid record length at .*: expected at least 26, got 1", $log_size ), "xl_tot_len short at end-of-page"); diff --git a/src/test/regress/expected/indirect_toast.out b/src/test/regress/expected/indirect_toast.out index 44b54dc37fd..313482b866c 100644 --- a/src/test/regress/expected/indirect_toast.out +++ b/src/test/regress/expected/indirect_toast.out @@ -161,6 +161,14 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; ("one-toasted,one-null, via indirect",0,1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 (5 rows) +create or replace function random_string(len integer) returns text as $$ +select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len); +$$ language sql; +create table toasttest_main(t text); +alter table toasttest_main alter column t set storage main; +insert into toasttest_main (select random_string(len) from generate_series(7000,8000) len); DROP TABLE indtoasttest; +DROP TABLE toasttest_main; DROP FUNCTION update_using_indirect(); +DROP FUNCTION random_string(integer); RESET default_toast_compression; diff --git a/src/test/regress/expected/insert.out b/src/test/regress/expected/insert.out index cf4b5221a8d..0d40f3e3026 100644 --- a/src/test/regress/expected/insert.out +++ b/src/test/regress/expected/insert.out @@ -100,7 +100,7 @@ SELECT pg_size_pretty(pg_relation_size('large_tuple_test'::regclass, 'main')); INSERT INTO large_tuple_test (select 3, NULL); -- now this tuple won't fit on the second page, but the insert should -- still succeed by extending the relation -INSERT INTO large_tuple_test (select 4, repeat('a', 8126)); +INSERT INTO large_tuple_test (select 4, repeat('a', 8112)); DROP TABLE large_tuple_test; -- -- check indirection (field/array assignment), cf bug #14265 @@ -1094,3 +1094,17 @@ insert into returningwrtest values (2, 'foo') returning returningwrtest; (1 row) drop table returningwrtest; +-- Check for MaxHeapTupleSize +create table maxheaptuplesize_test(value text); +alter table maxheaptuplesize_test alter column value set storage external; +insert into maxheaptuplesize_test values (repeat('x', 8104)); +insert into maxheaptuplesize_test values (repeat('x', 8112)); +insert into maxheaptuplesize_test values (repeat('x', 8120)); +insert into maxheaptuplesize_test values (repeat('x', 8128)); +insert into maxheaptuplesize_test values (repeat('x', 8136)); +insert into maxheaptuplesize_test values (repeat('x', 8144)); +insert into maxheaptuplesize_test values (repeat('x', 8152)); +insert into maxheaptuplesize_test values (repeat('x', 8160)); +insert into maxheaptuplesize_test values (repeat('x', 8168)); +insert into maxheaptuplesize_test values (repeat('x', 8176)); +drop table maxheaptuplesize_test; diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 20bf9ea9cdf..f992753eedf 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -197,7 +197,7 @@ WHERE p1.oid != p2.oid AND ORDER BY 1, 2; proargtypes | proargtypes -----------------------------+-------------------------- - integer | xid + bigint | xid timestamp without time zone | timestamp with time zone bit | bit varying txid_snapshot | pg_snapshot @@ -736,7 +736,7 @@ int8(oid) tideq(tid,tid) timestamptz_cmp(timestamp with time zone,timestamp with time zone) interval_cmp(interval,interval) -xideqint4(xid,integer) +xideqint8(xid,bigint) timetz_eq(time with time zone,time with time zone) timetz_ne(time with time zone,time with time zone) timetz_lt(time with time zone,time with time zone) @@ -850,7 +850,7 @@ pg_lsn_gt(pg_lsn,pg_lsn) pg_lsn_ne(pg_lsn,pg_lsn) pg_lsn_cmp(pg_lsn,pg_lsn) xidneq(xid,xid) -xidneqint4(xid,integer) +xidneqint8(xid,bigint) sha224(bytea) sha256(bytea) sha384(bytea) diff --git a/src/test/regress/expected/select_views.out b/src/test/regress/expected/select_views.out index 1aeed8452bd..804d9914e8d 100644 --- a/src/test/regress/expected/select_views.out +++ b/src/test/regress/expected/select_views.out @@ -2,9 +2,22 @@ -- SELECT_VIEWS -- test the views defined in CREATE_VIEWS -- -SELECT * FROM street; +SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C", cname COLLATE "C"; name | thepath | cname ------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------- + 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland + 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland + 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette + 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley + 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland + 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette + 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley + 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley + 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland + 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland + 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland + 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland + 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette Access Rd 25 | [(-121.9283,37.894),(-121.9283,37.9)] | Oakland Ada St | [(-122.2487,37.398),(-122.2496,37.401)] | Lafayette Agua Fria Creek | [(-121.9254,37.922),(-121.9281,37.889)] | Oakland @@ -22,10 +35,10 @@ SELECT * FROM street; Arroyo Las Positas | [(-121.7973,37.997),(-121.7957,37.005)] | Oakland Arroyo Seco | [(-121.7073,37.766),(-121.6997,37.729)] | Oakland Ash St | [(-122.0408,37.31),(-122.04,37.292)] | Oakland - Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Berkeley - Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Oakland + Avenue 134th | [(-122.1823,37.002),(-122.1851,37.992)] | Oakland Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Berkeley + Avenue 140th | [(-122.1656,37.003),(-122.1691,37.988)] | Oakland Avenue D | [(-122.298,37.848),(-122.3024,37.849)] | Berkeley B St | [(-122.1749,37.451),(-122.1743,37.443)] | Oakland Bancroft Ave | [(-122.15714,37.4242),(-122.156,37.409)] | Oakland @@ -37,9 +50,9 @@ SELECT * FROM street; Broadmore Ave | [(-122.095,37.522),(-122.0936,37.497)] | Oakland Broadway | [(-122.2409,37.586),(-122.2395,37.601)] | Berkeley Buckingham Blvd | [(-122.2231,37.59),(-122.2214,37.606)] | Berkeley + Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Oakland - Butterfield Dr | [(-122.0838,37.002),(-122.0834,37.987)] | Berkeley C St | [(-122.1768,37.46),(-122.1749,37.435)] | Oakland Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland Calaveras Creek | [(-121.8203,37.035),(-121.8207,37.931)] | Oakland @@ -60,9 +73,9 @@ SELECT * FROM street; Chapman Dr | [(-122.0421,37.504),(-122.0414,37.498)] | Oakland Charles St | [(-122.0255,37.505),(-122.0252,37.499)] | Oakland Cherry St | [(-122.0437,37.42),(-122.0434,37.413)] | Oakland + Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Berkeley Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Oakland Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Oakland - Claremont Pl | [(-122.0542,37.995),(-122.0542,37.008)] | Berkeley Coliseum Way | [(-122.2001,37.47),(-122.1978,37.516)] | Oakland Coliseum Way | [(-122.2113,37.626),(-122.2085,37.592),(-122.2063,37.568)] | Berkeley Coolidge Ave | [(-122.2007,37.058),(-122.1992,37.06)] | Lafayette @@ -77,9 +90,9 @@ SELECT * FROM street; Cull Canyon Road | [(-122.0536,37.435),(-122.0499,37.315)] | Oakland Cull Creek | [(-122.0624,37.875),(-122.0582,37.527)] | Berkeley D St | [(-122.1811,37.505),(-122.1805,37.497)] | Oakland + Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Oakland - Decoto Road | [(-122.0159,37.006),(-122.016,37.002),(-122.0164,37.993)] | Berkeley Deering St | [(-122.2146,37.904),(-122.2126,37.897)] | Berkeley Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Berkeley Dimond Ave | [(-122.2167,37.994),(-122.2162,37.006)] | Lafayette @@ -89,9 +102,9 @@ SELECT * FROM street; Driscoll Road | [(-121.9482,37.403),(-121.948451,37.39995)] | Oakland E St | [(-122.1832,37.505),(-122.1826,37.498),(-122.182,37.49)] | Oakland Eden Ave | [(-122.1143,37.505),(-122.1142,37.491)] | Oakland + Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Berkeley Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Oakland Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Oakland - Eden Creek | [(-122.022037,37.00675),(-122.0221,37.998)] | Berkeley Edgewater Dr | [(-122.201,37.379),(-122.2042,37.41)] | Lafayette Enos Way | [(-121.7677,37.896),(-121.7673,37.91)] | Oakland Euclid Ave | [(-122.2671,37.009),(-122.2666,37.987)] | Berkeley @@ -106,8 +119,8 @@ SELECT * FROM street; Harris Road | [(-122.0659,37.372),(-122.0675,37.363)] | Oakland Heartwood Dr | [(-122.2006,37.341),(-122.1992,37.338)] | Lafayette Hegenberger Exwy | [(-122.1946,37.52),(-122.1947,37.497)] | Oakland - Herrier St | [(-122.1943,37.006),(-122.1936,37.998)] | Oakland Herrier St | [(-122.1943,37.006),(-122.1936,37.998)] | Berkeley + Herrier St | [(-122.1943,37.006),(-122.1936,37.998)] | Oakland Hesperian Blvd | [(-122.097,37.333),(-122.0956,37.31),(-122.0946,37.293)] | Oakland Hesperian Blvd | [(-122.097,37.333),(-122.0956,37.31),(-122.0946,37.293)] | Oakland Hesperian Blvd | [(-122.1132,37.6),(-122.1123,37.586)] | Berkeley @@ -117,10 +130,10 @@ SELECT * FROM street; I- 580 | [(-121.9322,37.989),(-121.9243,37.006),(-121.9217,37.014)] | Oakland I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland I- 580 | [(-122.018,37.019),(-122.0009,37.032),(-121.9787,37.983),(-121.958,37.984),(-121.9571,37.986)] | Oakland - I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Berkeley - I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Oakland + I- 580 | [(-122.1108,37.023),(-122.1101,37.02),(-122.108103,37.00764),(-122.108,37.007),(-122.1069,37.998),(-122.1064,37.994),(-122.1053,37.982),(-122.1048,37.977),(-122.1032,37.958),(-122.1026,37.953),(-122.1013,37.938),(-122.0989,37.911),(-122.0984,37.91),(-122.098,37.908)] | Oakland I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Berkeley + I- 580 | [(-122.1543,37.703),(-122.1535,37.694),(-122.1512,37.655),(-122.1475,37.603),(-122.1468,37.583),(-122.1472,37.569),(-122.149044,37.54874),(-122.1493,37.546),(-122.1501,37.532),(-122.1506,37.509),(-122.1495,37.482),(-122.1487,37.467),(-122.1477,37.447),(-122.1414,37.383),(-122.1404,37.376),(-122.1398,37.372),(-122.139,37.356),(-122.1388,37.353),(-122.1385,37.34),(-122.1382,37.33),(-122.1378,37.316)] | Oakland I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Berkeley I- 580 | [(-122.2197,37.99),(-122.22,37.99),(-122.222092,37.99523),(-122.2232,37.998),(-122.224146,37.99963),(-122.2261,37.003),(-122.2278,37.007),(-122.2302,37.026),(-122.2323,37.043),(-122.2344,37.059),(-122.235405,37.06427),(-122.2365,37.07)] | Lafayette I- 580 Ramp | [(-121.8521,37.011),(-121.8479,37.999),(-121.8476,37.999),(-121.8456,37.01),(-121.8455,37.011)] | Oakland @@ -136,8 +149,8 @@ SELECT * FROM street; I- 580 Ramp | [(-122.0941,37.897),(-122.0943,37.902)] | Berkeley I- 580 Ramp | [(-122.096,37.888),(-122.0962,37.891),(-122.0964,37.9)] | Berkeley I- 580 Ramp | [(-122.101,37.898),(-122.1005,37.902),(-122.0989,37.911)] | Berkeley - I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Berkeley + I- 580 Ramp | [(-122.1086,37.003),(-122.1068,37.993),(-122.1066,37.992),(-122.1053,37.982)] | Oakland I- 580 Ramp | [(-122.1414,37.383),(-122.1407,37.376),(-122.1403,37.372),(-122.139,37.356)] | Oakland I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland I- 580/I-680 Ramp | ((-121.9207,37.988),(-121.9192,37.016)) | Oakland @@ -158,16 +171,16 @@ SELECT * FROM street; I- 880 | ((-121.9669,37.075),(-121.9663,37.071),(-121.9656,37.065),(-121.9618,37.037),(-121.95689,37),(-121.948,37.933)) | Oakland I- 880 | [(-121.948,37.933),(-121.9471,37.925),(-121.9467,37.923),(-121.946,37.918),(-121.9452,37.912),(-121.937,37.852)] | Oakland I- 880 | [(-122.0219,37.466),(-122.0205,37.447),(-122.020331,37.44447),(-122.020008,37.43962),(-122.0195,37.432),(-122.0193,37.429),(-122.0164,37.393),(-122.010219,37.34771),(-122.0041,37.313)] | Oakland - I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Berkeley + I- 880 | [(-122.0375,37.632),(-122.0359,37.619),(-122.0358,37.616),(-122.034514,37.60409),(-122.031876,37.57965),(-122.031193,37.57332),(-122.03016,37.56375),(-122.02943,37.55698),(-122.028689,37.54929),(-122.027833,37.53908),(-122.025979,37.51698),(-122.0238,37.491)] | Oakland + I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Berkeley I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Oakland - I- 880 | [(-122.0612,37.003),(-122.0604,37.991),(-122.0596,37.982),(-122.0585,37.967),(-122.0583,37.961),(-122.0553,37.918),(-122.053635,37.89475),(-122.050759,37.8546),(-122.05,37.844),(-122.0485,37.817),(-122.0483,37.813),(-122.0482,37.811)] | Berkeley I- 880 | [(-122.0831,37.312),(-122.0819,37.296),(-122.081,37.285),(-122.0786,37.248),(-122.078,37.24),(-122.077642,37.23496),(-122.076983,37.22567),(-122.076599,37.22026),(-122.076229,37.21505),(-122.0758,37.209)] | Oakland I- 880 | [(-122.0978,37.528),(-122.096,37.496),(-122.0931,37.453),(-122.09277,37.4496),(-122.090189,37.41442),(-122.0896,37.405),(-122.085,37.34)] | Oakland I- 880 | [(-122.1365,37.902),(-122.1358,37.898),(-122.1333,37.881),(-122.1323,37.874),(-122.1311,37.866),(-122.1308,37.865),(-122.1307,37.864),(-122.1289,37.851),(-122.1277,37.843),(-122.1264,37.834),(-122.1231,37.812),(-122.1165,37.766),(-122.1104,37.72),(-122.109695,37.71094),(-122.109,37.702),(-122.108312,37.69168),(-122.1076,37.681)] | Berkeley - I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Berkeley + I- 880 | [(-122.1755,37.185),(-122.1747,37.178),(-122.1742,37.173),(-122.1692,37.126),(-122.167792,37.11594),(-122.16757,37.11435),(-122.1671,37.111),(-122.1655,37.1),(-122.165169,37.09811),(-122.1641,37.092),(-122.1596,37.061),(-122.158381,37.05275),(-122.155991,37.03657),(-122.1531,37.017),(-122.1478,37.98),(-122.1407,37.932),(-122.1394,37.924),(-122.1389,37.92),(-122.1376,37.91)] | Oakland I- 880 | [(-122.2214,37.711),(-122.2202,37.699),(-122.2199,37.695),(-122.219,37.682),(-122.2184,37.672),(-122.2173,37.652),(-122.2159,37.638),(-122.2144,37.616),(-122.2138,37.612),(-122.2135,37.609),(-122.212,37.592),(-122.2116,37.586),(-122.2111,37.581)] | Berkeley I- 880 | [(-122.2707,37.975),(-122.2693,37.972),(-122.2681,37.966),(-122.267,37.962),(-122.2659,37.957),(-122.2648,37.952),(-122.2636,37.946),(-122.2625,37.935),(-122.2617,37.927),(-122.2607,37.921),(-122.2593,37.916),(-122.258,37.911),(-122.2536,37.898),(-122.2432,37.858),(-122.2408,37.845),(-122.2386,37.827),(-122.2374,37.811)] | Berkeley I- 880 Ramp | [(-122.0019,37.301),(-122.002,37.293)] | Oakland @@ -175,12 +188,12 @@ SELECT * FROM street; I- 880 Ramp | [(-122.0041,37.313),(-122.0038,37.308),(-122.0039,37.284),(-122.0013,37.287),(-121.9995,37.289)] | Oakland I- 880 Ramp | [(-122.0236,37.488),(-122.0231,37.458),(-122.0227,37.458),(-122.0223,37.452),(-122.0205,37.447)] | Oakland I- 880 Ramp | [(-122.0238,37.491),(-122.0215,37.483),(-122.0211,37.477),(-122.0205,37.447)] | Oakland + I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Berkeley I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Oakland I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Oakland - I- 880 Ramp | [(-122.059,37.982),(-122.0577,37.984),(-122.0612,37.003)] | Berkeley + I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Berkeley I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Oakland I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Oakland - I- 880 Ramp | [(-122.0618,37.011),(-122.0631,37.982),(-122.0585,37.967)] | Berkeley I- 880 Ramp | [(-122.085,37.34),(-122.0801,37.316),(-122.081,37.285)] | Oakland I- 880 Ramp | [(-122.085,37.34),(-122.0801,37.316),(-122.081,37.285)] | Oakland I- 880 Ramp | [(-122.085,37.34),(-122.0866,37.316),(-122.0819,37.296)] | Oakland @@ -212,26 +225,26 @@ SELECT * FROM street; Livermore Ave | [(-121.7687,37.448),(-121.769,37.375)] | Oakland Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland Livermore Ave | [(-121.772719,37.99085),(-121.7728,37.001)] | Oakland - Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Berkeley + Locust St | [(-122.1606,37.007),(-122.1593,37.987)] | Oakland Logan Ct | [(-122.0053,37.492),(-122.0061,37.484)] | Oakland Magnolia St | [(-122.0971,37.5),(-122.0962,37.484)] | Oakland Mandalay Road | [(-122.2322,37.397),(-122.2321,37.403)] | Lafayette Marin Ave | [(-122.2741,37.894),(-122.272,37.901)] | Berkeley Martin Luther King Jr Way | [(-122.2712,37.608),(-122.2711,37.599)] | Berkeley Mattos Dr | [(-122.0005,37.502),(-122.000898,37.49683)] | Oakland - Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Berkeley - McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland + Maubert Ave | [(-122.1114,37.009),(-122.1096,37.995)] | Oakland McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Berkeley + McClure Ave | [(-122.1431,37.001),(-122.1436,37.998)] | Oakland Medlar Dr | [(-122.0627,37.378),(-122.0625,37.375)] | Oakland Mildred Ct | [(-122.0002,37.388),(-121.9998,37.386)] | Oakland Miller Road | [(-122.0902,37.645),(-122.0865,37.545)] | Berkeley Miramar Ave | [(-122.1009,37.025),(-122.099089,37.03209)] | Oakland Mission Blvd | [(-121.918886,37),(-121.9194,37.976),(-121.9198,37.975)] | Oakland Mission Blvd | [(-121.918886,37),(-121.9194,37.976),(-121.9198,37.975)] | Oakland - Mission Blvd | [(-122.0006,37.896),(-121.9989,37.88)] | Oakland Mission Blvd | [(-122.0006,37.896),(-121.9989,37.88)] | Berkeley + Mission Blvd | [(-122.0006,37.896),(-121.9989,37.88)] | Oakland Moores Ave | [(-122.0087,37.301),(-122.0094,37.292)] | Oakland National Ave | [(-122.1192,37.5),(-122.1281,37.489)] | Oakland Navajo Ct | [(-121.8779,37.901),(-121.8783,37.9)] | Oakland @@ -242,49 +255,49 @@ SELECT * FROM street; Parkridge Dr | [(-122.1438,37.884),(-122.1428,37.9)] | Berkeley Parkside Dr | [(-122.0475,37.603),(-122.0443,37.596)] | Berkeley Paseo Padre Pkwy | [(-121.9143,37.005),(-121.913522,37)] | Oakland - Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Berkeley + Paseo Padre Pkwy | [(-122.0021,37.639),(-121.996,37.628)] | Oakland Pearl St | [(-122.2383,37.594),(-122.2366,37.615)] | Berkeley Periwinkle Road | [(-122.0451,37.301),(-122.044758,37.29844)] | Oakland Pimlico Dr | [(-121.8616,37.998),(-121.8618,37.008)] | Oakland Pimlico Dr | [(-121.8616,37.998),(-121.8618,37.008)] | Oakland Portsmouth Ave | [(-122.1064,37.315),(-122.1064,37.308)] | Oakland Proctor Ave | [(-122.2267,37.406),(-122.2251,37.386)] | Lafayette + Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Berkeley Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Oakland Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Oakland - Railroad Ave | [(-122.0245,37.013),(-122.0234,37.003),(-122.0223,37.993)] | Berkeley + Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Berkeley Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Oakland - Ranspot Dr | [(-122.0972,37.999),(-122.0959,37)] | Berkeley Redding St | [(-122.1978,37.901),(-122.1975,37.895)] | Berkeley - Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Berkeley + Redwood Road | [(-122.1493,37.98),(-122.1437,37.001)] | Oakland Roca Dr | [(-122.0335,37.609),(-122.0314,37.599)] | Berkeley Rosedale Ct | [(-121.9232,37.9),(-121.924,37.897)] | Oakland Sacramento St | [(-122.2799,37.606),(-122.2797,37.597)] | Berkeley Saddle Brook Dr | [(-122.1478,37.909),(-122.1454,37.904),(-122.1451,37.888)] | Berkeley Saginaw Ct | [(-121.8803,37.898),(-121.8806,37.901)] | Oakland San Andreas Dr | [(-122.0609,37.9),(-122.0614,37.895)] | Berkeley + Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Berkeley Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Oakland - Santa Maria Ave | [(-122.0773,37),(-122.0773,37.98)] | Berkeley Shattuck Ave | [(-122.2686,37.904),(-122.2686,37.897)] | Berkeley Sheridan Road | [(-122.2279,37.425),(-122.2253,37.411),(-122.2223,37.377)] | Lafayette Shoreline Dr | [(-122.2657,37.603),(-122.2648,37.6)] | Berkeley - Skyline Blvd | [(-122.1738,37.01),(-122.1714,37.996)] | Oakland Skyline Blvd | [(-122.1738,37.01),(-122.1714,37.996)] | Berkeley + Skyline Blvd | [(-122.1738,37.01),(-122.1714,37.996)] | Oakland Skyline Dr | [(-122.0277,37.5),(-122.0284,37.498)] | Oakland Skywest Dr | [(-122.1161,37.62),(-122.1123,37.586)] | Berkeley Southern Pacific Railroad | [(-122.3002,37.674),(-122.2999,37.661)] | Berkeley Sp Railroad | [(-121.893564,37.99009),(-121.897,37.016)] | Oakland Sp Railroad | [(-121.893564,37.99009),(-121.897,37.016)] | Oakland Sp Railroad | [(-121.9565,37.898),(-121.9562,37.9)] | Oakland + Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Berkeley Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Oakland Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Oakland - Sp Railroad | [(-122.0734,37.001),(-122.0734,37.997)] | Berkeley Sp Railroad | [(-122.0914,37.601),(-122.087,37.56),(-122.086408,37.5551)] | Berkeley - Sp Railroad | [(-122.137792,37.003),(-122.1365,37.992),(-122.131257,37.94612)] | Oakland Sp Railroad | [(-122.137792,37.003),(-122.1365,37.992),(-122.131257,37.94612)] | Berkeley + Sp Railroad | [(-122.137792,37.003),(-122.1365,37.992),(-122.131257,37.94612)] | Oakland Sp Railroad | [(-122.1947,37.497),(-122.193328,37.4848)] | Oakland Stanton Ave | [(-122.100392,37.0697),(-122.099513,37.06052)] | Oakland State Hwy 123 | [(-122.3004,37.986),(-122.2998,37.969),(-122.2995,37.962),(-122.2992,37.952),(-122.299,37.942),(-122.2987,37.935),(-122.2984,37.924),(-122.2982,37.92),(-122.2976,37.904),(-122.297,37.88),(-122.2966,37.869),(-122.2959,37.848),(-122.2961,37.843)] | Berkeley @@ -316,28 +329,15 @@ SELECT * FROM street; Welch Creek Road | [(-121.7695,37.386),(-121.7737,37.413)] | Oakland Welch Creek Road | [(-121.7695,37.386),(-121.7737,37.413)] | Oakland West Loop Road | [(-122.0576,37.604),(-122.0602,37.586)] | Berkeley + Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Berkeley Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Oakland - Western Pacific Railroad Spur | [(-122.0394,37.018),(-122.0394,37.961)] | Berkeley Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland Whitlock Creek | [(-121.74683,37.91276),(-121.733107,37)] | Oakland Willimet Way | [(-122.0964,37.517),(-122.0949,37.493)] | Oakland - Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Oakland Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Berkeley + Wisconsin St | [(-122.1994,37.017),(-122.1975,37.998),(-122.1971,37.994)] | Oakland Wp Railroad | [(-122.254,37.902),(-122.2506,37.891)] | Berkeley - 100th Ave | [(-122.1657,37.429),(-122.1647,37.432)] | Oakland - 107th Ave | [(-122.1555,37.403),(-122.1531,37.41)] | Oakland - 14th St | [(-122.299,37.147),(-122.3,37.148)] | Lafayette - 19th Ave | [(-122.2366,37.897),(-122.2359,37.905)] | Berkeley - 1st St | [(-121.75508,37.89294),(-121.753581,37.90031)] | Oakland - 5th St | [(-122.278,37),(-122.2792,37.005),(-122.2803,37.009)] | Lafayette - 5th St | [(-122.296,37.615),(-122.2953,37.598)] | Berkeley - 82nd Ave | [(-122.1695,37.596),(-122.1681,37.603)] | Berkeley - 85th Ave | [(-122.1877,37.466),(-122.186,37.476)] | Oakland - 89th Ave | [(-122.1822,37.459),(-122.1803,37.471)] | Oakland - 98th Ave | [(-122.1568,37.498),(-122.1558,37.502)] | Oakland - 98th Ave | [(-122.1693,37.438),(-122.1682,37.444)] | Oakland - 98th Ave | [(-122.2001,37.258),(-122.1974,37.27)] | Lafayette (333 rows) SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2; diff --git a/src/test/regress/expected/txid.out b/src/test/regress/expected/txid.out index 95ba66e95ee..2ea4434f513 100644 --- a/src/test/regress/expected/txid.out +++ b/src/test/regress/expected/txid.out @@ -238,9 +238,11 @@ SELECT txid_snapshot '1:9223372036854775807:3'; (1 row) SELECT txid_snapshot '1:9223372036854775808:3'; -ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3" -LINE 1: SELECT txid_snapshot '1:9223372036854775808:3'; - ^ + txid_snapshot +------------------------- + 1:9223372036854775808:3 +(1 row) + -- test txid_current_if_assigned BEGIN; SELECT txid_current_if_assigned() IS NULL; diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index 8eff3d10d27..5f327aa997a 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -19,7 +19,7 @@ WHERE t1.typnamespace = 0 OR (t1.typlen <= 0 AND t1.typlen != -1 AND t1.typlen != -2) OR (t1.typtype not in ('b', 'c', 'd', 'e', 'm', 'p', 'r')) OR NOT t1.typisdefined OR - (t1.typalign not in ('c', 's', 'i', 'd')) OR + (t1.typalign not in ('c', 's', 'i', 'd', 'x')) OR (t1.typstorage not in ('p', 'x', 'e', 'm')); oid | typname -----+--------- @@ -32,7 +32,8 @@ WHERE t1.typbyval AND (t1.typlen != 1 OR t1.typalign != 'c') AND (t1.typlen != 2 OR t1.typalign != 's') AND (t1.typlen != 4 OR t1.typalign != 'i') AND - (t1.typlen != 8 OR t1.typalign != 'd'); + (t1.typlen != 8 OR t1.typalign != 'd') AND + (t1.typlen != 8 OR t1.typalign != 'x'); oid | typname -----+--------- (0 rows) diff --git a/src/test/regress/expected/xid.out b/src/test/regress/expected/xid.out index 835077e9d57..0154990d1af 100644 --- a/src/test/regress/expected/xid.out +++ b/src/test/regress/expected/xid.out @@ -8,9 +8,9 @@ select '010'::xid, '42'::xid8, '0xffffffffffffffff'::xid8, '-1'::xid8; - xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8 ------+-----+------------+------------+------+------+----------------------+---------------------- - 8 | 42 | 4294967295 | 4294967295 | 8 | 42 | 18446744073709551615 | 18446744073709551615 + xid | xid | xid | xid | xid8 | xid8 | xid8 | xid8 +-----+-----+------------+----------------------+------+------+----------------------+---------------------- + 8 | 42 | 4294967295 | 18446744073709551615 | 8 | 42 | 18446744073709551615 | 18446744073709551615 (1 row) -- garbage values @@ -43,10 +43,10 @@ SELECT pg_input_is_valid('asdf', 'xid'); f (1 row) -SELECT * FROM pg_input_error_info('0xffffffffff', 'xid'); - message | detail | hint | sql_error_code ----------------------------------------------------+--------+------+---------------- - value "0xffffffffff" is out of range for type xid | | | 22003 +SELECT * FROM pg_input_error_info('0xffffffffffffffffffff', 'xid'); + message | detail | hint | sql_error_code +-------------------------------------------------------------+--------+------+---------------- + value "0xffffffffffffffffffff" is out of range for type xid | | | 22003 (1 row) SELECT pg_input_is_valid('42', 'xid8'); @@ -441,9 +441,11 @@ SELECT pg_snapshot '1:9223372036854775807:3'; (1 row) SELECT pg_snapshot '1:9223372036854775808:3'; -ERROR: invalid input syntax for type pg_snapshot: "1:9223372036854775808:3" -LINE 1: SELECT pg_snapshot '1:9223372036854775808:3'; - ^ + pg_snapshot +------------------------- + 1:9223372036854775808:3 +(1 row) + -- test pg_current_xact_id_if_assigned BEGIN; SELECT pg_current_xact_id_if_assigned() IS NULL; diff --git a/src/test/regress/expected/xid64.out b/src/test/regress/expected/xid64.out new file mode 100644 index 00000000000..c30c5b57399 --- /dev/null +++ b/src/test/regress/expected/xid64.out @@ -0,0 +1,92 @@ +--- +--- Unit test for xid64 functions +--- +-- directory paths and dlsuffix are passed to us in environment variables +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX +\set regresslib :libdir '/regress' :dlsuffix +CREATE FUNCTION xid64_test_1(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_1' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_2(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_2' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_double_xmax(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_double_xmax' LANGUAGE C STRICT; +--- +--- Check page consistency after conversion +--- +CREATE UNLOGGED TABLE test_xid64_table(a int); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); +INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a; +SELECT xid64_test_1('test_xid64_table'); +INFO: test 1: page is converted to xid64 format + xid64_test_1 +-------------- + +(1 row) + +DROP TABLE test_xid64_table; +--- +--- Check tuples consistency after conversion +--- +CREATE UNLOGGED TABLE test_xid64_table(s serial, i int, t text); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); +DO $$ +BEGIN + FOR j IN 1..20 LOOP + INSERT INTO test_xid64_table(i, t) VALUES (random()::int, md5(random()::text)); + COMMIT; + END LOOP; +END $$; +DO $$ +BEGIN + FOR j IN 1..10 LOOP + DELETE FROM test_xid64_table WHERE ctid IN (SELECT ctid FROM test_xid64_table TABLESAMPLE BERNOULLI (5)); + COMMIT; + END LOOP; +END $$; +SELECT xid64_test_2('test_xid64_table'); + xid64_test_2 +-------------- + +(1 row) + +DROP TABLE test_xid64_table; +--- +--- Check tuples consistency after conversion to double xmax (on full page) +--- +CREATE UNLOGGED TABLE test_xid64_table(i int); +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table SELECT i FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; +SELECT xid64_test_2('test_xid64_table'); + xid64_test_2 +-------------- + +(1 row) + +DROP TABLE test_xid64_table; +CREATE UNLOGGED TABLE test_xid64_table(i text); +INSERT INTO test_xid64_table(i) VALUES ('NNBABCDSDFGHJKLP'); +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table(i) SELECT 'A' FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; +SELECT xid64_test_double_xmax('test_xid64_table'); +INFO: test double xmax: page 0 is converted into double xmax format +INFO: test double xmax: end + xid64_test_double_xmax +------------------------ + +(1 row) + +DROP TABLE test_xid64_table; +DROP FUNCTION xid64_test_1(rel regclass); +DROP FUNCTION xid64_test_2(rel regclass); +DROP FUNCTION xid64_test_double_xmax(rel regclass); diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index 0a35f2f8f6a..9bb8ad05504 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -28,7 +28,7 @@ test: strings md5 numerology point lseg line box path polygon circle date time t # geometry depends on point, lseg, line, box, path, polygon, circle # horology depends on date, time, timetz, timestamp, timestamptz, interval # ---------- -test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database stats_import +test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid xid64 mvcc database stats_import # ---------- # Load huge amounts of data diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index ed4a7937331..d2240c60dae 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -21,6 +21,9 @@ #include "access/detoast.h" #include "access/htup_details.h" +#include "access/transam.h" +#include "access/xact.h" +#include "catalog/catalog.h" #include "catalog/namespace.h" #include "catalog/pg_operator.h" #include "catalog/pg_type.h" @@ -39,6 +42,7 @@ #include "postmaster/postmaster.h" /* for MAX_BACKENDS */ #include "storage/spin.h" #include "utils/array.h" +#include "storage/checksum.h" #include "utils/builtins.h" #include "utils/geo_decls.h" #include "utils/memutils.h" @@ -575,6 +579,7 @@ make_tuple_indirect(PG_FUNCTION_ARGS) tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; + HeapTupleSetZeroXids(&tuple); tuple.t_data = rec; values = (Datum *) palloc(ncolumns * sizeof(Datum)); @@ -1237,3 +1242,296 @@ test_relpath(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + +#include "access/hio.h" +#include "access/relation.h" +#include "storage/bufmgr.h" +#include "utils/rel.h" + +static void +CheckNewPage(char *msg, Page page) +{ + uint16 size; + + if (PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION) + elog(ERROR, "%s: page version is %d, expected %d ", + msg, PageGetPageLayoutVersion(page), PG_PAGE_LAYOUT_VERSION); + + size = PageGetSpecialSize(page); + if (size == MAXALIGN(sizeof(HeapPageSpecialData))) + elog(INFO, "%s: page is converted to xid64 format", msg); + else if (HeapPageIsDoubleXmax(page)) + elog(INFO, "%s: page is converted into double xmax format", msg); + else + elog(ERROR, "%s: converted page has pageSpecial size %u, expected %llu", + msg, size, + (unsigned long long) MAXALIGN(sizeof(HeapPageSpecialData))); +} + +/* + * Get page from relation. + * Make this page look like in 32-bit xid format. + * Convert it to 64-bit xid format. + * Run basic checks. + */ +PG_FUNCTION_INFO_V1(xid64_test_1); +Datum +xid64_test_1(PG_FUNCTION_ARGS) +{ + Oid relid; + Relation rel; + Buffer buf; + Page page; + PageHeader hdr; + + relid = PG_GETARG_OID(0); + rel = relation_open(relid, AccessExclusiveLock); + buf = ReadBuffer(rel, 0); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(buf); + hdr = (PageHeader) page; + + if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HeapPageSpecialData))) + elog(ERROR, "page expected in new format"); + + if (PageGetPageLayoutVersion(page) != PG_PAGE_LAYOUT_VERSION) + elog(ERROR, "unknown page version (%u)", + PageGetPageLayoutVersion(page)); + + hdr->pd_special = BLCKSZ; + PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1); + hdr->pd_checksum = 0; //turn off check sum check performed in the convert_page + convert_page(rel, buf, 0); + + CheckNewPage("test 1", page); + + UnlockReleaseBuffer(buf); + relation_close(rel, AccessExclusiveLock); + + PG_RETURN_VOID(); +} + +typedef struct TupleCheckValues +{ + TransactionId xmin; + TransactionId xmax; +} TupleCheckValues; + +typedef struct RelCheckValues +{ + TupleCheckValues *tcv; + Size ntuples; +} RelCheckValues; + +static RelCheckValues +FillRelCheckValues(Relation rel, Buffer buffer, Page page) +{ + RelCheckValues set; + Size n; + +#define DEFAULT_SET_SIZE 64 + n = DEFAULT_SET_SIZE; + set.ntuples = 0; + set.tcv = palloc(sizeof(set.tcv[0]) * n); + + { + OffsetNumber maxoff, + offnum; + HeapTupleHeader tuphdr; + ItemId itemid; + HeapTupleData tuple; + TransactionId xmin, + xmax; + + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + tuphdr = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_data = tuphdr; + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + + if (HeapPageGetSpecial(page) == heapDoubleXmaxSpecial) + { + xmin = tuphdr->t_choice.t_heap.t_xmin; + xmax = tuphdr->t_choice.t_heap.t_xmax; + } + else + { + HeapTupleCopyXidsFromPage(buffer, &tuple, page, + IsToastRelation(rel)); + + xmin = HeapTupleGetRawXmin(&tuple); + xmax = HeapTupleGetRawXmax(&tuple); + } + + if (set.ntuples == n) + { + n *= 2; + set.tcv = repalloc(set.tcv, sizeof(set.tcv[0]) * n); + } + + set.tcv[set.ntuples].xmin = xmin; + set.tcv[set.ntuples].xmax = xmax; + set.ntuples++; + } + } + + return set; +} + +/* + * Test xmin/xmax invariant when converting page from 32bit xid to 64xid. + * + * Scenario: + * - enforce all relation pages to 32bit xid format, discarding pd_xid_base and + * pd_multi_base + * - store all xmin/xmax in array + * - convert all the pages from relation into 64xid format + * - store all new xmin/xmax in array + * - compare old and new xmin/xmax + * + * NOTE: inital xid value does not affect test as pd_xid_base/pd_multi_base + * discarded. + */ +PG_FUNCTION_INFO_V1(xid64_test_2); +Datum +xid64_test_2(PG_FUNCTION_ARGS) +{ + Oid relid; + Relation rel; + RelCheckValues before, + after; + BlockNumber pageno, + npages; + Size i; + + relid = PG_GETARG_OID(0); + rel = relation_open(relid, AccessExclusiveLock); + npages = RelationGetNumberOfBlocks(rel); + + for (pageno = 0; pageno != npages; ++pageno) + { + Buffer buf; + Page page; + PageHeader hdr; + + /* get page */ + buf = ReadBuffer(rel, pageno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); + hdr = (PageHeader) page; + + /* make page look like 32-bit xid page */ + hdr->pd_special = BLCKSZ; + PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1); + + before = FillRelCheckValues(rel, buf, page); + hdr->pd_checksum = pg_checksum_page((char *) page, pageno); + convert_page(rel, buf, pageno); + after = FillRelCheckValues(rel, buf, page); + + /* check */ + if (before.ntuples != after.ntuples) + elog(ERROR, "numer of tuples must be equal"); + + for (i = 0; i != before.ntuples; ++i) + { + if (before.tcv[i].xmin != after.tcv[i].xmin && after.tcv[i].xmin) + elog(ERROR, "old and new xmin does not match (%llu != %llu)", + (unsigned long long) before.tcv[i].xmin, + (unsigned long long) after.tcv[i].xmin); + + if (before.tcv[i].xmax != after.tcv[i].xmax) + elog(ERROR, "old and new xmax does not match (%llu != %llu)", + (unsigned long long) before.tcv[i].xmax, + (unsigned long long) after.tcv[i].xmax); + } + + Assert(npages != 0); + pfree(before.tcv); + pfree(after.tcv); + + UnlockReleaseBuffer(buf); + } + + relation_close(rel, AccessExclusiveLock); + + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(xid64_test_double_xmax); +Datum +xid64_test_double_xmax(PG_FUNCTION_ARGS) +{ + Oid relid; + Relation rel; + BlockNumber pageno, + npages; + bool found; + + relid = PG_GETARG_OID(0); + rel = relation_open(relid, AccessExclusiveLock); + npages = RelationGetNumberOfBlocks(rel); + found = false; + + for (pageno = 0; pageno != npages; ++pageno) + { + Buffer buf; + Page page; + PageHeader hdr; + ItemId itemid; + OffsetNumber offnum; + HeapTupleHeader tuphdr; + + buf = ReadBuffer(rel, pageno); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buf); + hdr = (PageHeader) page; + + if (pageno == 0) + { + itemid = PageGetItemId(page, FirstOffsetNumber); + itemid->lp_len += 16; /* Move to overlap special */ + } + + for (offnum = FirstOffsetNumber; + offnum <= PageGetMaxOffsetNumber(page); + offnum = OffsetNumberNext(offnum)) + { + itemid = PageGetItemId(page, offnum); + tuphdr = (HeapTupleHeader) PageGetItem(page, itemid); + tuphdr->t_infomask |= HEAP_XMIN_COMMITTED; + } + + hdr->pd_special = BLCKSZ; + PageSetPageSizeAndVersion(page, BLCKSZ, PG_PAGE_LAYOUT_VERSION - 1); + hdr->pd_checksum = pg_checksum_page((char *) page, pageno); + + convert_page(rel, buf, pageno); + + if (HeapPageIsDoubleXmax(page)) + { + found = true; + elog(INFO, "test double xmax: page %u is converted into double xmax format", + pageno); + } + + UnlockReleaseBuffer(buf); + } + + if (!found) + elog(ERROR, "test double xmax: failed, no double xmax"); + + Assert(npages != 0); + elog(INFO, "test double xmax: end"); + + relation_close(rel, AccessExclusiveLock); + + PG_RETURN_VOID(); +} diff --git a/src/test/regress/sql/indirect_toast.sql b/src/test/regress/sql/indirect_toast.sql index 3e2f6c02375..ea087b51282 100644 --- a/src/test/regress/sql/indirect_toast.sql +++ b/src/test/regress/sql/indirect_toast.sql @@ -76,7 +76,18 @@ SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; VACUUM FREEZE indtoasttest; SELECT substring(indtoasttest::text, 1, 200) FROM indtoasttest; +create or replace function random_string(len integer) returns text as $$ +select substr((select string_agg(r,'') from (select random()::text as r from generate_series(1,(len+15)/16)) s1), 1, len); +$$ language sql; + +create table toasttest_main(t text); +alter table toasttest_main alter column t set storage main; + +insert into toasttest_main (select random_string(len) from generate_series(7000,8000) len); + DROP TABLE indtoasttest; +DROP TABLE toasttest_main; DROP FUNCTION update_using_indirect(); +DROP FUNCTION random_string(integer); RESET default_toast_compression; diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql index 2b086eeb6d7..d8bb0a9222e 100644 --- a/src/test/regress/sql/insert.sql +++ b/src/test/regress/sql/insert.sql @@ -55,7 +55,7 @@ INSERT INTO large_tuple_test (select 3, NULL); -- now this tuple won't fit on the second page, but the insert should -- still succeed by extending the relation -INSERT INTO large_tuple_test (select 4, repeat('a', 8126)); +INSERT INTO large_tuple_test (select 4, repeat('a', 8112)); DROP TABLE large_tuple_test; @@ -674,3 +674,18 @@ alter table returningwrtest2 drop c; alter table returningwrtest attach partition returningwrtest2 for values in (2); insert into returningwrtest values (2, 'foo') returning returningwrtest; drop table returningwrtest; + +-- Check for MaxHeapTupleSize +create table maxheaptuplesize_test(value text); +alter table maxheaptuplesize_test alter column value set storage external; +insert into maxheaptuplesize_test values (repeat('x', 8104)); +insert into maxheaptuplesize_test values (repeat('x', 8112)); +insert into maxheaptuplesize_test values (repeat('x', 8120)); +insert into maxheaptuplesize_test values (repeat('x', 8128)); +insert into maxheaptuplesize_test values (repeat('x', 8136)); +insert into maxheaptuplesize_test values (repeat('x', 8144)); +insert into maxheaptuplesize_test values (repeat('x', 8152)); +insert into maxheaptuplesize_test values (repeat('x', 8160)); +insert into maxheaptuplesize_test values (repeat('x', 8168)); +insert into maxheaptuplesize_test values (repeat('x', 8176)); +drop table maxheaptuplesize_test; diff --git a/src/test/regress/sql/select_views.sql b/src/test/regress/sql/select_views.sql index e742f136990..a94bd7259c9 100644 --- a/src/test/regress/sql/select_views.sql +++ b/src/test/regress/sql/select_views.sql @@ -3,7 +3,7 @@ -- test the views defined in CREATE_VIEWS -- -SELECT * FROM street; +SELECT * FROM street ORDER BY name COLLATE "C", thepath::text COLLATE "C", cname COLLATE "C"; SELECT name, #thepath FROM iexit ORDER BY name COLLATE "C", 2; diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index 303f90955d1..b3aac2a74eb 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -22,7 +22,7 @@ WHERE t1.typnamespace = 0 OR (t1.typlen <= 0 AND t1.typlen != -1 AND t1.typlen != -2) OR (t1.typtype not in ('b', 'c', 'd', 'e', 'm', 'p', 'r')) OR NOT t1.typisdefined OR - (t1.typalign not in ('c', 's', 'i', 'd')) OR + (t1.typalign not in ('c', 's', 'i', 'd', 'x')) OR (t1.typstorage not in ('p', 'x', 'e', 'm')); -- Look for "pass by value" types that can't be passed by value. @@ -33,7 +33,8 @@ WHERE t1.typbyval AND (t1.typlen != 1 OR t1.typalign != 'c') AND (t1.typlen != 2 OR t1.typalign != 's') AND (t1.typlen != 4 OR t1.typalign != 'i') AND - (t1.typlen != 8 OR t1.typalign != 'd'); + (t1.typlen != 8 OR t1.typalign != 'd') AND + (t1.typlen != 8 OR t1.typalign != 'x'); -- Look for "toastable" types that aren't varlena. diff --git a/src/test/regress/sql/xid.sql b/src/test/regress/sql/xid.sql index 9f716b3653a..9b94cb9a4a8 100644 --- a/src/test/regress/sql/xid.sql +++ b/src/test/regress/sql/xid.sql @@ -19,7 +19,7 @@ select 'asdf'::xid8; -- Also try it with non-error-throwing API SELECT pg_input_is_valid('42', 'xid'); SELECT pg_input_is_valid('asdf', 'xid'); -SELECT * FROM pg_input_error_info('0xffffffffff', 'xid'); +SELECT * FROM pg_input_error_info('0xffffffffffffffffffff', 'xid'); SELECT pg_input_is_valid('42', 'xid8'); SELECT pg_input_is_valid('asdf', 'xid8'); SELECT * FROM pg_input_error_info('0xffffffffffffffffffff', 'xid8'); diff --git a/src/test/regress/sql/xid64.sql b/src/test/regress/sql/xid64.sql new file mode 100644 index 00000000000..caa97a0ed99 --- /dev/null +++ b/src/test/regress/sql/xid64.sql @@ -0,0 +1,84 @@ +--- +--- Unit test for xid64 functions +--- + +-- directory paths and dlsuffix are passed to us in environment variables +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX + +\set regresslib :libdir '/regress' :dlsuffix + +CREATE FUNCTION xid64_test_1(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_1' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_2(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_2' LANGUAGE C STRICT; +CREATE FUNCTION xid64_test_double_xmax(rel regclass) RETURNS VOID + AS :'regresslib', 'xid64_test_double_xmax' LANGUAGE C STRICT; + +--- +--- Check page consistency after conversion +--- +CREATE UNLOGGED TABLE test_xid64_table(a int); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); +INSERT INTO test_xid64_table(a) SELECT a FROM generate_series(1, 1000) AS a; +SELECT xid64_test_1('test_xid64_table'); +DROP TABLE test_xid64_table; + +--- +--- Check tuples consistency after conversion +--- +CREATE UNLOGGED TABLE test_xid64_table(s serial, i int, t text); +ALTER TABLE test_xid64_table SET (autovacuum_enabled = false); + +DO $$ +BEGIN + FOR j IN 1..20 LOOP + INSERT INTO test_xid64_table(i, t) VALUES (random()::int, md5(random()::text)); + COMMIT; + END LOOP; +END $$; + +DO $$ +BEGIN + FOR j IN 1..10 LOOP + DELETE FROM test_xid64_table WHERE ctid IN (SELECT ctid FROM test_xid64_table TABLESAMPLE BERNOULLI (5)); + COMMIT; + END LOOP; +END $$; + +SELECT xid64_test_2('test_xid64_table'); +DROP TABLE test_xid64_table; + +--- +--- Check tuples consistency after conversion to double xmax (on full page) +--- +CREATE UNLOGGED TABLE test_xid64_table(i int); + +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table SELECT i FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; + +SELECT xid64_test_2('test_xid64_table'); +DROP TABLE test_xid64_table; + +CREATE UNLOGGED TABLE test_xid64_table(i text); +INSERT INTO test_xid64_table(i) VALUES ('NNBABCDSDFGHJKLP'); + +DO $$ +BEGIN + FOR j IN 1..40 LOOP + INSERT INTO test_xid64_table(i) SELECT 'A' FROM generate_series(1, 100) AS i; + COMMIT; + END LOOP; +END $$; + +SELECT xid64_test_double_xmax('test_xid64_table'); +DROP TABLE test_xid64_table; + +DROP FUNCTION xid64_test_1(rel regclass); +DROP FUNCTION xid64_test_2(rel regclass); +DROP FUNCTION xid64_test_double_xmax(rel regclass); diff --git a/src/test/xid-64/.gitignore b/src/test/xid-64/.gitignore new file mode 100644 index 00000000000..0d41f282aa3 --- /dev/null +++ b/src/test/xid-64/.gitignore @@ -0,0 +1,8 @@ +# Generated subdirectories +/tmp_check/ +/results/ +/log/ + +# Note: regression.* are only left behind on a failure; that's why they're not ignored +#/regression.diffs +#/regression.out diff --git a/src/test/xid-64/Makefile b/src/test/xid-64/Makefile new file mode 100644 index 00000000000..3b1e50dfc0d --- /dev/null +++ b/src/test/xid-64/Makefile @@ -0,0 +1,22 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/test/xid-64 +# +# Copyright (c) 2018, Postgres Professional +# +# src/test/xid-64/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/test/xid-64 +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/xid-64/README b/src/test/xid-64/README new file mode 100644 index 00000000000..01c0a1a1f74 --- /dev/null +++ b/src/test/xid-64/README @@ -0,0 +1,16 @@ +src/test/xid-64/README + +Regression tests for 64-bit XIDs +============================================= + +This directory contains a test suite for 64-bit xids. + +Running the tests +================= + + make check + +NOTE: This creates a temporary installation, and some tests may +create one or multiple nodes. + +NOTE: This requires the --enable-tap-tests argument to configure. diff --git a/src/test/xid-64/meson.build b/src/test/xid-64/meson.build new file mode 100644 index 00000000000..63a780b69e8 --- /dev/null +++ b/src/test/xid-64/meson.build @@ -0,0 +1,16 @@ +tests += { + 'name': 'xid-64', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'tap': { + 'tests': [ + 't/001_test_large_xids.pl', + 't/002_test_gucs.pl', + 't/003_test_integrity.pl', + 't/004_test_relminmxid.pl', + 't/005_stream_subxact.pl', + 't/006_zeropage.pl', + 't/007_first_multi.pl', + ], + }, +} diff --git a/src/test/xid-64/t/002_test_gucs.pl b/src/test/xid-64/t/002_test_gucs.pl new file mode 100644 index 00000000000..93413892336 --- /dev/null +++ b/src/test/xid-64/t/002_test_gucs.pl @@ -0,0 +1,79 @@ +# Tests for guc boundary values +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use bigint; + +sub command_output +{ + my ($cmd) = @_; + my ($stdout, $stderr); + print("# Running: " . join(" ", @{$cmd}) . "\n"); + my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; + ok($result, "@$cmd exit code 0"); + is($stderr, '', "@$cmd no stderr"); + return $stdout; +} + +sub set_guc +{ + my ($node, $guc, $val) = @_; + print("SET $guc = $val\n"); + $node->safe_psql('postgres', "ALTER SYSTEM SET $guc = $val"); + $node->restart(); +} + +sub test_pgbench +{ + my ($node) = @_; + $node->command_ok( + [ qw(pgbench --progress=5 --transactions=1000 --jobs=5 --client=5) ], + 'pgbench finished without errors'); +} + +my @guc_vals = ( + [ "autovacuum_freeze_max_age", 100000, 2**63 - 1 ], + [ "autovacuum_multixact_freeze_max_age", 10000, 2**63 - 1 ], + [ "vacuum_freeze_min_age", 0, 2**63 - 1 ], + [ "vacuum_freeze_table_age", 0, 2**63 - 1 ], + [ "vacuum_multixact_freeze_min_age", 0, 2**63 - 1 ], + [ "vacuum_multixact_freeze_table_age", 0, 2**63 -1 ] +); + +my $START_VAL = 2**32; +my $MAX_VAL = 2**62; + +my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL)); + +# Initialize master node +my $node = PostgreSQL::Test::Cluster->new('master'); +$node->init; +# Disable logging of all statements to avoid log bloat during pgbench +$node->append_conf('postgresql.conf', "log_statement = none"); +$node->start; + +# Fill the test database with the pgbench data +$node->command_ok( + [ qw(pgbench --initialize --scale=10) ], + 'pgbench finished without errors'); + +# Test all GUCs with minimum, maximum and random value inbetween +# (run pgbench for every configuration setting) +foreach my $gi (0 .. $#guc_vals) { + print($guc_vals[$gi][0]); print("\n"); + my $guc = $guc_vals[$gi][0]; + my $minval = $guc_vals[$gi][1]; + my $maxval = $guc_vals[$gi][2]; + set_guc($node, $guc, $minval); + test_pgbench($node); + set_guc($node, $guc, $maxval); + test_pgbench($node); + set_guc($node, $guc, $minval + int(rand($maxval - $minval))); + test_pgbench($node); +} + +done_testing(); diff --git a/src/test/xid-64/t/003_test_integrity.pl b/src/test/xid-64/t/003_test_integrity.pl new file mode 100644 index 00000000000..b4b49ca9c6d --- /dev/null +++ b/src/test/xid-64/t/003_test_integrity.pl @@ -0,0 +1,58 @@ +# Check integrity after dump/restore with different xids +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Compare; + +my $tempdir = PostgreSQL::Test::Utils::tempdir; +use bigint; + +my $START_VAL = 2**32; +my $MAX_VAL = 2**62; + +my $ixid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imxid = $START_VAL + int(rand($MAX_VAL - $START_VAL)); +my $imoff = $START_VAL + int(rand($MAX_VAL - $START_VAL)); + +# Initialize master node +my $node = PostgreSQL::Test::Cluster->new('master'); +$node->init(); +$node->start; + +# Create a database and fill it with the pgbench data +$node->safe_psql('postgres', "CREATE DATABASE pgbench_db"); +$node->command_ok( + [ qw(pgbench --initialize --scale=2 pgbench_db) ], + 'pgbench finished without errors'); +# Dump the database (cluster the main table to put data in a determined order) +$node->safe_psql('pgbench_db', qq( + CREATE INDEX pa_aid_idx ON pgbench_accounts (aid); + CLUSTER pgbench_accounts USING pa_aid_idx)); +$node->command_ok( + [ "pg_dump", "-w", "--inserts", "--no-statistics", "--file=$tempdir/pgbench.sql", "pgbench_db" ], + 'pgdump finished without errors'); +$node->stop('fast'); + +# Initialize second node +my $node2 = PostgreSQL::Test::Cluster->new('master2'); +$node2->init; +# Disable logging of all statements to avoid log bloat during restore +$node2->append_conf('postgresql.conf', "log_statement = none"); +$node2->start; + +# Create a database and restore the previous dump +$node2->safe_psql('postgres', "CREATE DATABASE pgbench_db"); +my $txid0 = $node2->safe_psql('pgbench_db', 'SELECT txid_current()'); +print("# Initial txid_current: $txid0\n"); +$node2->command_ok(["psql", "-q", "-f", "$tempdir/pgbench.sql", "pgbench_db"]); + +# Dump the database and compare the dumped content with the previous one +$node2->safe_psql('pgbench_db', 'CLUSTER pgbench_accounts'); +$node2->command_ok( + [ "pg_dump", "-w", "--inserts", "--no-statistics", "--file=$tempdir/pgbench2.sql", "pgbench_db" ], + 'pgdump finished without errors'); +ok(File::Compare::compare_text("$tempdir/pgbench.sql", "$tempdir/pgbench2.sql") == 0, "no differences detected"); + +done_testing(); diff --git a/src/test/xid-64/t/004_test_relminmxid.pl b/src/test/xid-64/t/004_test_relminmxid.pl new file mode 100644 index 00000000000..e924f9cd9ab --- /dev/null +++ b/src/test/xid-64/t/004_test_relminmxid.pl @@ -0,0 +1,90 @@ +# Check integrity after dump/restore with different xids +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use bigint; + +my ($node, $rmm, $vacout); +$node = PostgreSQL::Test::Cluster->new('master'); +$node->init; +$node->append_conf('postgresql.conf', 'max_prepared_transactions = 2'); +$node->start; + +sub relminmxid +{ + my $rmm = $node->safe_psql("postgres", qq( + SELECT relminmxid + FROM pg_class + WHERE relname = 'foo';)); + return $rmm + 0; +} + +sub vacuum +{ + my ($rc, $stdout, $stderr) = $node->psql("postgres", "VACUUM foo;"); + return $stdout.$stderr; +} + +sub gen_multixact +{ + $node->safe_psql("postgres", qq( + BEGIN; + SELECT * FROM foo FOR KEY SHARE; + PREPARE TRANSACTION 'fooshare'; + )); + + my $xmax = $node->safe_psql("postgres", qq( + SELECT xmax FROM foo; + )); + isnt($xmax + 0, 0, "xmax not empty"); + + $node->safe_psql("postgres", qq( + BEGIN; + SELECT * FROM foo FOR KEY SHARE; + COMMIT; + COMMIT PREPARED 'fooshare'; + )); + + my $mxact = $node->safe_psql("postgres", qq( + SELECT xmax FROM foo; + )); + isnt($mxact + 0, 0, "mxact not empty"); + cmp_ok($xmax, '>', $mxact, "xmax is greater than mxact"); +} + +# Initialize master node with the random xid-related parameters +$node->safe_psql("postgres", "CREATE TABLE foo (a int); INSERT INTO foo VALUES (1);"); + +is(relminmxid(), 1, "relminmxid is default"); + +vacuum(); +is(relminmxid(), 1, "relminmxid is still default"); + +gen_multixact(); +is(relminmxid(), 1, "relminmxid is still still default"); + +unlike(vacuum(), qr/multixact.*before relminmxid/, "no relminmxid error"); + +# No intentionally break relminmxid +$node->safe_psql("postgres", qq( + UPDATE pg_class SET relminmxid = ((1::int8<<62) + 1)::text::xid + WHERE relname = 'foo' +)); +cmp_ok(relminmxid(), '>', 2**62, "relminmxid broken (intentionally)"); + +gen_multixact(); +like(vacuum(), qr/multixact.*before relminmxid/, "got relminmxid error"); +cmp_ok(relminmxid(), '>', 2**62, "relminmxid broken (still)"); + +# Fix relminmxid by setting to default +$node->safe_psql("postgres", qq( + UPDATE pg_class SET relminmxid = '1' + WHERE relname = 'foo' +)); +is(relminmxid(), 1, "relminmxid is default again"); + +unlike(vacuum(), qr/multixact.*before relminmxid/, "no relminmxid error again"); + +done_testing(); diff --git a/src/test/xid-64/t/005_stream_subxact.pl b/src/test/xid-64/t/005_stream_subxact.pl new file mode 100644 index 00000000000..6765f6061ca --- /dev/null +++ b/src/test/xid-64/t/005_stream_subxact.pl @@ -0,0 +1,100 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test xids streaming of large transaction containing large subtransactions +# near 32-bit boundary. +# +# Mostly it is a copy of 016_stream_subxact.pl, but with publisher xid inited +# just before 32-bit boundary, so if xids are replicated as 32-bit values, +# subscriber will get 0 xid value. +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Create publisher node +my $node_publisher = PostgreSQL::Test::Cluster->new('publisher'); +$node_publisher->init(allows_streaming => 'logical'); +$node_publisher->append_conf('postgresql.conf', + 'logical_decoding_work_mem = 64kB'); +$node_publisher->start; + +# Create subscriber node +my $node_subscriber = PostgreSQL::Test::Cluster->new('subscriber'); +$node_subscriber->init(allows_streaming => 'logical'); +$node_subscriber->start; + +# Create some preexisting content on publisher +$node_publisher->safe_psql('postgres', + "CREATE TABLE test_tab (a int primary key, b varchar)"); +$node_publisher->safe_psql('postgres', + "INSERT INTO test_tab VALUES (1, 'foo'), (2, 'bar')"); + +# Setup structure on subscriber +$node_subscriber->safe_psql('postgres', + "CREATE TABLE test_tab (a int primary key, b text, c timestamptz DEFAULT now(), d bigint DEFAULT 999)" +); + +# Setup logical replication +my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; +$node_publisher->safe_psql('postgres', + "CREATE PUBLICATION tap_pub FOR TABLE test_tab"); + +my $appname = 'tap_sub'; +$node_subscriber->safe_psql('postgres', + "CREATE SUBSCRIPTION tap_sub CONNECTION '$publisher_connstr application_name=$appname' PUBLICATION tap_pub WITH (streaming = on)" +); + +$node_publisher->wait_for_catchup($appname); + +# Also wait for initial table sync to finish +my $synced_query = + "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('r', 's');"; +$node_subscriber->poll_query_until('postgres', $synced_query) + or die "Timed out while waiting for subscriber to synchronize data"; + +my $result = + $node_subscriber->safe_psql('postgres', + "SELECT count(*), count(c), count(d = 999) FROM test_tab"); +is($result, qq(2|2|2), 'check initial data was copied to subscriber'); + +# Insert, update and delete enough rows to exceed 64kB limit. +$node_publisher->safe_psql( + 'postgres', q{ +BEGIN; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series( 3, 500) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s1; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(501, 1000) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s2; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(1001, 1500) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s3; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(1501, 2000) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +SAVEPOINT s4; +INSERT INTO test_tab SELECT i, md5(i::text) FROM generate_series(2001, 2500) s(i); +UPDATE test_tab SET b = md5(b) WHERE mod(a,2) = 0; +DELETE FROM test_tab WHERE mod(a,3) = 0; +COMMIT; +}); + +$node_publisher->wait_for_catchup($appname); + +$result = + $node_subscriber->safe_psql('postgres', + "SELECT count(*), count(c), count(d = 999) FROM test_tab"); +is($result, qq(1667|1667|1667), + 'check data was copied to subscriber in streaming mode and extra columns contain local defaults' +); + +$node_subscriber->stop; +$node_publisher->stop; + +done_testing(); diff --git a/src/test/xid-64/t/006_zeropage.pl b/src/test/xid-64/t/006_zeropage.pl new file mode 100644 index 00000000000..4b87c90edcd --- /dev/null +++ b/src/test/xid-64/t/006_zeropage.pl @@ -0,0 +1,33 @@ +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Check WAL for ZEROPAGE record. + +sub command_output +{ + my ($cmd) = @_; + my ($stdout, $stderr); + print("# Running: " . join(" ", @{$cmd}) . "\n"); + my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; + return $stdout; +} + +my $node = PostgreSQL::Test::Cluster->new('main'); +$node->init; +$node->start; +my $pgdata = $node->data_dir; +my $xlogfilename0 = $node->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn())"); +#$node->command_like( +# [ 'pg_waldump', '-S', "$pgdata/pg_wal/$xlogfilename0" ], +# qr/ZEROPAGE/, +# 'pg_waldump prints start timestamp'); +my $wd_output = command_output( + [ 'pg_waldump', "$pgdata/pg_wal/$xlogfilename0" ]); +ok($wd_output =~ qr/ZEROPAGE page 0/, "ZEROPAGE found"); + +done_testing(); diff --git a/src/test/xid-64/t/007_first_multi.pl b/src/test/xid-64/t/007_first_multi.pl new file mode 100644 index 00000000000..eca2c39af9e --- /dev/null +++ b/src/test/xid-64/t/007_first_multi.pl @@ -0,0 +1,83 @@ +# Test for pages with first tuple has xmax multi +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +sub test_multixact +{ + my ($primary, $standby, $test_name) = @_; + + $primary->safe_psql('postgres', q{ + CREATE TABLE t (id INT, data TEXT, CONSTRAINT t_id_pk PRIMARY KEY(id)); + INSERT INTO t SELECT 1, repeat('a', 1000); + }); + + my %psql = ( + '1' => $primary->background_psql('postgres'), + '2' => $primary->background_psql('postgres'), + ); + + # Lock tuples + $psql{1}->query_safe(q( + BEGIN; + SELECT * FROM t FOR KEY SHARE; + )); + + $psql{2}->query_safe(q( + BEGIN; + SELECT * FROM t FOR KEY SHARE; + )); + + # Repeat update until we get a new page with one tuple + my $res; + my $guard = 0; + + do { + $res = $primary->safe_psql('postgres', q{ + UPDATE t SET data = repeat('a', 1000) RETURNING ctid; + }); + # Fail if we already write around 64k and still have no new page. + fail("creating second page") if (++$guard == 64); + } until ($res eq "(1,1)"); + + $psql{1}->quit; + $psql{2}->quit; + $primary->wait_for_catchup($standby); + + # Check results + my $query = q{ + SELECT xmax, ctid, id, data = repeat('a', 1000) as data FROM t; + }; + my $res_primary = $primary->safe_psql('postgres', $query); + my $res_standby = $standby->safe_psql('postgres', $query); + + is($res_primary, $res_standby, "rows are the same in test $test_name"); +} + +# We should run test for full_page_writes on and off. +foreach ('true', 'false') { + # Create primary + my $primary = PostgreSQL::Test::Cluster->new("master_$_"); + $primary->init(allows_streaming => 1); + $primary->append_conf('postgresql.conf', "full_page_writes = $_"); + $primary->start; + + # Take backup + my $backup_name = "my_backup_$_"; + $primary->backup($backup_name); + + # Create standby from backup + my $standby = PostgreSQL::Test::Cluster->new("standby_$_"); + $standby->init_from_backup($primary, $backup_name, has_streaming => 1); + $standby->start; + + # Check + test_multixact($primary, $standby, "with FPW $_"); + + $standby->stop(); + $primary->stop(); +} + +done_testing(); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 3fbf5a4c212..b9381926def 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3664,8 +3664,8 @@ intset_node intvKEY io_callback_fn io_stat_col -itemIdCompact -itemIdCompactData +ItemIdCompact +ItemIdCompactData iterator jmp_buf join_search_hook_type -- 2.48.1