From d409230e66ac3197e5cff9346caa9a72f438373e Mon Sep 17 00:00:00 2001 From: Dilip Kumar Date: Wed, 16 Feb 2022 17:29:39 +0530 Subject: [PATCH v4 2/4] Use 56 bits for relfilenode to avoid wraparound MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As part of this patch, we will make the relfilenode 64 bits wide. But the problem is that if we make it 64 bits wide then the size of the BufferTag will be increased which will increase the memory usage and that may also impact the performance. So in order to avoid that inside the buffer tag, instead of using 64 bits for the relfilenode we will use 8 bits for the fork number and 56 bits for the relfilenode. --- .../pg_buffercache/pg_buffercache--1.0--1.1.sql | 2 +- contrib/pg_buffercache/pg_buffercache--1.2.sql | 2 +- contrib/pg_buffercache/pg_buffercache_pages.c | 8 +-- contrib/pg_prewarm/autoprewarm.c | 4 +- doc/src/sgml/catalogs.sgml | 2 +- doc/src/sgml/pgbuffercache.sgml | 2 +- src/backend/access/common/syncscan.c | 2 +- src/backend/access/gin/ginxlog.c | 2 +- src/backend/access/rmgrdesc/gistdesc.c | 2 +- src/backend/access/rmgrdesc/heapdesc.c | 2 +- src/backend/access/rmgrdesc/nbtdesc.c | 2 +- src/backend/access/rmgrdesc/seqdesc.c | 2 +- src/backend/access/rmgrdesc/xlogdesc.c | 15 +++++- src/backend/access/transam/README | 4 +- src/backend/access/transam/varsup.c | 52 +++++++++++++++++++- src/backend/access/transam/xlog.c | 37 ++++++++++++++ src/backend/access/transam/xlogrecovery.c | 6 +-- src/backend/access/transam/xlogutils.c | 8 +-- src/backend/catalog/catalog.c | 57 ++++------------------ src/backend/catalog/heap.c | 29 ++++++----- src/backend/catalog/index.c | 21 ++++---- src/backend/commands/cluster.c | 12 ++--- src/backend/commands/indexcmds.c | 6 +-- src/backend/commands/sequence.c | 2 +- src/backend/commands/tablecmds.c | 19 +++++--- src/backend/nodes/outfuncs.c | 2 +- src/backend/parser/parse_utilcmd.c | 4 +- src/backend/replication/logical/decode.c | 1 + src/backend/replication/logical/reorderbuffer.c | 2 +- src/backend/storage/buffer/bufmgr.c | 2 +- src/backend/storage/freespace/fsmpage.c | 2 +- src/backend/storage/lmgr/lwlocknames.txt | 1 + src/backend/storage/smgr/smgr.c | 2 +- src/backend/utils/adt/dbsize.c | 16 +++--- src/backend/utils/adt/pg_upgrade_support.c | 12 ++--- src/backend/utils/cache/relcache.c | 18 +++---- src/backend/utils/cache/relfilenodemap.c | 8 +-- src/backend/utils/cache/relmapper.c | 15 +++--- src/backend/utils/misc/pg_controldata.c | 9 +++- src/bin/pg_checksums/pg_checksums.c | 6 +-- src/bin/pg_controldata/pg_controldata.c | 2 + src/bin/pg_dump/pg_dump.c | 28 +++++------ src/bin/pg_rewind/filemap.c | 8 +-- src/bin/pg_upgrade/info.c | 4 +- src/bin/pg_upgrade/pg_upgrade.c | 6 +-- src/bin/pg_upgrade/pg_upgrade.h | 4 +- src/bin/pg_upgrade/relfilenode.c | 4 +- src/bin/pg_waldump/pg_waldump.c | 6 +-- src/common/relpath.c | 22 ++++----- src/fe_utils/option_utils.c | 42 ++++++++++++++++ src/include/access/transam.h | 4 ++ src/include/access/xlog.h | 1 + src/include/catalog/binary_upgrade.h | 6 +-- src/include/catalog/catalog.h | 4 +- src/include/catalog/heap.h | 2 +- src/include/catalog/index.h | 2 +- src/include/catalog/pg_class.h | 10 ++-- src/include/catalog/pg_control.h | 2 + src/include/catalog/pg_proc.dat | 10 ++-- src/include/commands/tablecmds.h | 2 +- src/include/common/relpath.h | 2 +- src/include/fe_utils/option_utils.h | 3 ++ src/include/nodes/parsenodes.h | 2 +- src/include/postgres_ext.h | 15 ++++++ src/include/storage/buf_internals.h | 29 ++++++++--- src/include/storage/relfilenode.h | 14 ++++-- src/include/utils/rel.h | 2 +- src/include/utils/relcache.h | 2 +- src/include/utils/relfilenodemap.h | 2 +- src/include/utils/relmapper.h | 6 +-- src/test/regress/expected/alter_table.out | 20 ++++---- src/test/regress/sql/alter_table.sql | 4 +- 72 files changed, 411 insertions(+), 259 deletions(-) diff --git a/contrib/pg_buffercache/pg_buffercache--1.0--1.1.sql b/contrib/pg_buffercache/pg_buffercache--1.0--1.1.sql index 54d02f5..5e93238 100644 --- a/contrib/pg_buffercache/pg_buffercache--1.0--1.1.sql +++ b/contrib/pg_buffercache/pg_buffercache--1.0--1.1.sql @@ -6,6 +6,6 @@ -- Upgrade view to 1.1. format CREATE OR REPLACE VIEW pg_buffercache AS SELECT P.* FROM pg_buffercache_pages() AS P - (bufferid integer, relfilenode oid, reltablespace oid, reldatabase oid, + (bufferid integer, relfilenode int8, reltablespace oid, reldatabase oid, relforknumber int2, relblocknumber int8, isdirty bool, usagecount int2, pinning_backends int4); diff --git a/contrib/pg_buffercache/pg_buffercache--1.2.sql b/contrib/pg_buffercache/pg_buffercache--1.2.sql index 6ee5d84..f52ddcd 100644 --- a/contrib/pg_buffercache/pg_buffercache--1.2.sql +++ b/contrib/pg_buffercache/pg_buffercache--1.2.sql @@ -12,7 +12,7 @@ LANGUAGE C PARALLEL SAFE; -- Create a view for convenient access. CREATE VIEW pg_buffercache AS SELECT P.* FROM pg_buffercache_pages() AS P - (bufferid integer, relfilenode oid, reltablespace oid, reldatabase oid, + (bufferid integer, relfilenode int8, reltablespace oid, reldatabase oid, relforknumber int2, relblocknumber int8, isdirty bool, usagecount int2, pinning_backends int4); diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c index 6af96c8..94d2570 100644 --- a/contrib/pg_buffercache/pg_buffercache_pages.c +++ b/contrib/pg_buffercache/pg_buffercache_pages.c @@ -26,7 +26,7 @@ PG_MODULE_MAGIC; typedef struct { uint32 bufferid; - Oid relfilenode; + RelNode relfilenode; Oid reltablespace; Oid reldatabase; ForkNumber forknum; @@ -103,7 +103,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS) TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid", INT4OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode", - OIDOID, -1, 0); + INT8OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace", OIDOID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase", @@ -153,7 +153,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS) buf_state = LockBufHdr(bufHdr); fctx->record[i].bufferid = BufferDescriptorGetBuffer(bufHdr); - fctx->record[i].relfilenode = bufHdr->tag.fileNode; + fctx->record[i].relfilenode = BufTagGetFileNode(bufHdr->tag); fctx->record[i].reltablespace = bufHdr->tag.spcOid; fctx->record[i].reldatabase = bufHdr->tag.dbOid; fctx->record[i].forknum = bufHdr->tag.forkNum; @@ -209,7 +209,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS) } else { - values[1] = ObjectIdGetDatum(fctx->record[i].relfilenode); + values[1] = Int8GetDatum(fctx->record[i].relfilenode); nulls[1] = false; values[2] = ObjectIdGetDatum(fctx->record[i].reltablespace); nulls[2] = false; diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c index fe537e9..6899ace 100644 --- a/contrib/pg_prewarm/autoprewarm.c +++ b/contrib/pg_prewarm/autoprewarm.c @@ -62,7 +62,7 @@ typedef struct BlockInfoRecord { Oid database; Oid tablespace; - Oid filenode; + RelNode filenode; ForkNumber forknum; BlockNumber blocknum; } BlockInfoRecord; @@ -618,7 +618,7 @@ apw_dump_now(bool is_bgworker, bool dump_unlogged) { block_info_array[num_blocks].database = bufHdr->tag.dbOid; block_info_array[num_blocks].tablespace = bufHdr->tag.spcOid; - block_info_array[num_blocks].filenode = bufHdr->tag.fileNode; + block_info_array[num_blocks].filenode = BufTagGetFileNode(bufHdr->tag); block_info_array[num_blocks].forknum = bufHdr->tag.forkNum; block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum; ++num_blocks; diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 5a1627a..d6e1fad 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1960,7 +1960,7 @@ SCRAM-SHA-256$<iteration count>:&l - relfilenode oid + relfilenode int8 Name of the on-disk file of this relation; zero means this diff --git a/doc/src/sgml/pgbuffercache.sgml b/doc/src/sgml/pgbuffercache.sgml index e68d159..631cd2f 100644 --- a/doc/src/sgml/pgbuffercache.sgml +++ b/doc/src/sgml/pgbuffercache.sgml @@ -62,7 +62,7 @@ - relfilenode oid + relfilenode int8 (references pg_class.relfilenode) diff --git a/src/backend/access/common/syncscan.c b/src/backend/access/common/syncscan.c index d5b16c5..aa71523 100644 --- a/src/backend/access/common/syncscan.c +++ b/src/backend/access/common/syncscan.c @@ -161,7 +161,7 @@ SyncScanShmemInit(void) */ item->location.relfilenode.spcNode = InvalidOid; item->location.relfilenode.dbNode = InvalidOid; - item->location.relfilenode.relNode = InvalidOid; + item->location.relfilenode.relNode = InvalidRelNode; item->location.location = InvalidBlockNumber; item->prev = (i > 0) ? diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c index 87e8366..17b77b9 100644 --- a/src/backend/access/gin/ginxlog.c +++ b/src/backend/access/gin/ginxlog.c @@ -100,7 +100,7 @@ ginRedoInsertEntry(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rda BlockNumber blknum; BufferGetTag(buffer, &node, &forknum, &blknum); - elog(ERROR, "failed to add item to index page in %u/%u/%u", + elog(ERROR, "failed to add item to index page in %u/%u/" INT64_FORMAT, node.spcNode, node.dbNode, node.relNode); } } diff --git a/src/backend/access/rmgrdesc/gistdesc.c b/src/backend/access/rmgrdesc/gistdesc.c index 9cab4fa..203685a 100644 --- a/src/backend/access/rmgrdesc/gistdesc.c +++ b/src/backend/access/rmgrdesc/gistdesc.c @@ -26,7 +26,7 @@ out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec) static void out_gistxlogPageReuse(StringInfo buf, gistxlogPageReuse *xlrec) { - appendStringInfo(buf, "rel %u/%u/%u; blk %u; latestRemovedXid %u:%u", + appendStringInfo(buf, "rel %u/%u/" INT64_FORMAT "; blk %u; latestRemovedXid %u:%u", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, xlrec->block, EpochFromFullTransactionId(xlrec->latestRemovedFullXid), diff --git a/src/backend/access/rmgrdesc/heapdesc.c b/src/backend/access/rmgrdesc/heapdesc.c index 6238085..57af152 100644 --- a/src/backend/access/rmgrdesc/heapdesc.c +++ b/src/backend/access/rmgrdesc/heapdesc.c @@ -169,7 +169,7 @@ heap2_desc(StringInfo buf, XLogReaderState *record) { xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec; - appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u", + appendStringInfo(buf, "rel %u/%u/" INT64_FORMAT "; tid %u/%u", xlrec->target_node.spcNode, xlrec->target_node.dbNode, xlrec->target_node.relNode, diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c index dfbbf4e..8c44ebd 100644 --- a/src/backend/access/rmgrdesc/nbtdesc.c +++ b/src/backend/access/rmgrdesc/nbtdesc.c @@ -100,7 +100,7 @@ btree_desc(StringInfo buf, XLogReaderState *record) { xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) rec; - appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u:%u", + appendStringInfo(buf, "rel %u/%u/" INT64_FORMAT "; latestRemovedXid %u:%u", xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode, EpochFromFullTransactionId(xlrec->latestRemovedFullXid), diff --git a/src/backend/access/rmgrdesc/seqdesc.c b/src/backend/access/rmgrdesc/seqdesc.c index d9b1e60..5385ded 100644 --- a/src/backend/access/rmgrdesc/seqdesc.c +++ b/src/backend/access/rmgrdesc/seqdesc.c @@ -25,7 +25,7 @@ seq_desc(StringInfo buf, XLogReaderState *record) xl_seq_rec *xlrec = (xl_seq_rec *) rec; if (info == XLOG_SEQ_LOG) - appendStringInfo(buf, "rel %u/%u/%u", + appendStringInfo(buf, "rel %u/%u/" INT64_FORMAT, xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode); } diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index e7452af..9066566 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -45,8 +45,8 @@ xlog_desc(StringInfo buf, XLogReaderState *record) CheckPoint *checkpoint = (CheckPoint *) rec; appendStringInfo(buf, "redo %X/%X; " - "tli %u; prev tli %u; fpw %s; xid %u:%u; oid %u; multi %u; offset %u; " - "oldest xid %u in DB %u; oldest multi %u in DB %u; " + "tli %u; prev tli %u; fpw %s; xid %u:%u; relfilenode " INT64_FORMAT ";oid %u; " + "multi %u; offset %u; oldest xid %u in DB %u; oldest multi %u in DB %u; " "oldest/newest commit timestamp xid: %u/%u; " "oldest running xid %u; %s", LSN_FORMAT_ARGS(checkpoint->redo), @@ -55,6 +55,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) checkpoint->fullPageWrites ? "true" : "false", EpochFromFullTransactionId(checkpoint->nextXid), XidFromFullTransactionId(checkpoint->nextXid), + checkpoint->nextRelNode, checkpoint->nextOid, checkpoint->nextMulti, checkpoint->nextMultiOffset, @@ -74,6 +75,13 @@ xlog_desc(StringInfo buf, XLogReaderState *record) memcpy(&nextOid, rec, sizeof(Oid)); appendStringInfo(buf, "%u", nextOid); } + else if (info == XLOG_NEXT_RELFILENODE) + { + RelNode nextRelFilenode; + + memcpy(&nextRelFilenode, rec, sizeof(RelNode)); + appendStringInfo(buf, INT64_FORMAT, nextRelFilenode); + } else if (info == XLOG_RESTORE_POINT) { xl_restore_point *xlrec = (xl_restore_point *) rec; @@ -169,6 +177,9 @@ xlog_identify(uint8 info) case XLOG_NEXTOID: id = "NEXTOID"; break; + case XLOG_NEXT_RELFILENODE: + id = "NEXT_RELFILENODE"; + break; case XLOG_SWITCH: id = "SWITCH"; break; diff --git a/src/backend/access/transam/README b/src/backend/access/transam/README index 1edc818..5c81f6c 100644 --- a/src/backend/access/transam/README +++ b/src/backend/access/transam/README @@ -692,8 +692,8 @@ by having database restart search for files that don't have any committed entry in pg_class, but that currently isn't done because of the possibility of deleting data that is useful for forensic analysis of the crash. Orphan files are harmless --- at worst they waste a bit of disk space --- -because we check for on-disk collisions when allocating new relfilenode -OIDs. So cleaning up isn't really necessary. +because relfilenode is 56 bit wide so logically there should not be any +collisions. So cleaning up isn't really necessary. 3. Deleting a table, which requires an unlink() that could fail. diff --git a/src/backend/access/transam/varsup.c b/src/backend/access/transam/varsup.c index 748120a..1361393 100644 --- a/src/backend/access/transam/varsup.c +++ b/src/backend/access/transam/varsup.c @@ -30,6 +30,9 @@ /* Number of OIDs to prefetch (preallocate) per XLOG write */ #define VAR_OID_PREFETCH 8192 +/* Number of RelFileNode to prefetch (preallocate) per XLOG write */ +#define VAR_RFN_PREFETCH 8192 + /* pointer to "variable cache" in shared memory (set up by shmem.c) */ VariableCache ShmemVariableCache = NULL; @@ -521,8 +524,7 @@ ForceTransactionIdLimitUpdate(void) * wide, counter wraparound will occur eventually, and therefore it is unwise * to assume they are unique unless precautions are taken to make them so. * Hence, this routine should generally not be used directly. The only direct - * callers should be GetNewOidWithIndex() and GetNewRelFileNode() in - * catalog/catalog.c. + * callers should be GetNewOidWithIndex() in catalog/catalog.c. */ Oid GetNewObjectId(void) @@ -613,6 +615,52 @@ SetNextObjectId(Oid nextOid) } /* + * GetNewRelNode + * + * Simmilar to GetNewObjectId but instead of new Oid it generates new relnode. + */ +RelNode +GetNewRelNode(void) +{ + RelNode result; + + /* Safety check, we should never get this far in a HS standby */ + if (RecoveryInProgress()) + elog(ERROR, "cannot assign RelNode during recovery"); + + LWLockAcquire(RelNodeGenLock, LW_EXCLUSIVE); + + /* + * Check for the wraparound for the relnode counter. + * + * XXX Actually the relnode is 56 bits wide so we don't need to worry about + * the wraparound case. + */ + if (ShmemVariableCache->nextRelNode > MAX_RELFILENODE) + { + ShmemVariableCache->nextRelNode = FirstNormalRelNode; + ShmemVariableCache->relnodecount = 0; + } + + /* If we run out of logged for use RelNode then we must log more */ + if (ShmemVariableCache->relnodecount == 0) + { + XLogPutNextRelFileNode(ShmemVariableCache->nextRelNode + + VAR_RFN_PREFETCH); + + ShmemVariableCache->relnodecount = VAR_RFN_PREFETCH; + } + + result = ShmemVariableCache->nextRelNode; + (ShmemVariableCache->nextRelNode)++; + (ShmemVariableCache->relnodecount)--; + + LWLockRelease(RelNodeGenLock); + + return result; +} + +/* * StopGeneratingPinnedObjectIds * * This is called once during initdb to force the OID counter up to diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index ce78ac4..92ac7a7 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4547,6 +4547,7 @@ BootStrapXLOG(void) checkPoint.nextXid = FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId); checkPoint.nextOid = FirstGenbkiObjectId; + checkPoint.nextRelNode = FirstNormalRelNode; checkPoint.nextMulti = FirstMultiXactId; checkPoint.nextMultiOffset = 0; checkPoint.oldestXid = FirstNormalTransactionId; @@ -4560,7 +4561,9 @@ BootStrapXLOG(void) ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextOid = checkPoint.nextOid; + ShmemVariableCache->nextRelNode = checkPoint.nextRelNode; ShmemVariableCache->oidCount = 0; + ShmemVariableCache->relnodecount = 0; MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); AdvanceOldestClogXid(checkPoint.oldestXid); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); @@ -5023,7 +5026,9 @@ StartupXLOG(void) /* initialize shared memory variables from the checkpoint record */ ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextOid = checkPoint.nextOid; + ShmemVariableCache->nextRelNode = checkPoint.nextRelNode; ShmemVariableCache->oidCount = 0; + ShmemVariableCache->relnodecount = 0; MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); AdvanceOldestClogXid(checkPoint.oldestXid); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); @@ -6454,6 +6459,12 @@ CreateCheckPoint(int flags) checkPoint.nextOid += ShmemVariableCache->oidCount; LWLockRelease(OidGenLock); + LWLockAcquire(RelNodeGenLock, LW_SHARED); + checkPoint.nextRelNode = ShmemVariableCache->nextRelNode; + if (!shutdown) + checkPoint.nextRelNode += ShmemVariableCache->relnodecount; + LWLockRelease(RelNodeGenLock); + MultiXactGetCheckptMulti(shutdown, &checkPoint.nextMulti, &checkPoint.nextMultiOffset, @@ -7308,6 +7319,18 @@ XLogPutNextOid(Oid nextOid) } /* + * Simmialr to the XLogPutNextOid but instead of writing NEXTOID log record it + * writes a NEXT_RELFILENODE log record. + */ +void +XLogPutNextRelFileNode(RelNode nextrelnode) +{ + XLogBeginInsert(); + XLogRegisterData((char *) (&nextrelnode), sizeof(RelNode)); + (void) XLogInsert(RM_XLOG_ID, XLOG_NEXT_RELFILENODE); +} + +/* * Write an XLOG SWITCH record. * * Here we just blindly issue an XLogInsert request for the record. @@ -7522,6 +7545,16 @@ xlog_redo(XLogReaderState *record) ShmemVariableCache->oidCount = 0; LWLockRelease(OidGenLock); } + if (info == XLOG_NEXT_RELFILENODE) + { + RelNode nextRelNode; + + memcpy(&nextRelNode, XLogRecGetData(record), sizeof(RelNode)); + LWLockAcquire(RelNodeGenLock, LW_EXCLUSIVE); + ShmemVariableCache->nextRelNode = nextRelNode; + ShmemVariableCache->relnodecount = 0; + LWLockRelease(RelNodeGenLock); + } else if (info == XLOG_CHECKPOINT_SHUTDOWN) { CheckPoint checkPoint; @@ -7536,6 +7569,10 @@ xlog_redo(XLogReaderState *record) ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->oidCount = 0; LWLockRelease(OidGenLock); + LWLockAcquire(RelNodeGenLock, LW_EXCLUSIVE); + ShmemVariableCache->nextRelNode = checkPoint.nextRelNode; + ShmemVariableCache->relnodecount = 0; + LWLockRelease(RelNodeGenLock); MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index d5269ed..57c9d75 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -2144,13 +2144,13 @@ xlog_block_info(StringInfo buf, XLogReaderState *record) XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk); if (forknum != MAIN_FORKNUM) - appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, fork %u, blk %u", + appendStringInfo(buf, "; blkref #%d: rel %u/%u/" INT64_FORMAT ", fork %u, blk %u", block_id, rnode.spcNode, rnode.dbNode, rnode.relNode, forknum, blk); else - appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, blk %u", + appendStringInfo(buf, "; blkref #%d: rel %u/%u/" INT64_FORMAT ", blk %u", block_id, rnode.spcNode, rnode.dbNode, rnode.relNode, blk); @@ -2343,7 +2343,7 @@ verifyBackupPageConsistency(XLogReaderState *record) if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0) { elog(FATAL, - "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u", + "inconsistent page found, rel %u/%u/" INT64_FORMAT ", forknum %u, blkno %u", rnode.spcNode, rnode.dbNode, rnode.relNode, forknum, blkno); } diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index 54d5f20..f9f0aa8 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -593,17 +593,17 @@ CreateFakeRelcacheEntry(RelFileNode rnode) rel->rd_rel->relpersistence = RELPERSISTENCE_PERMANENT; /* We don't know the name of the relation; use relfilenode instead */ - sprintf(RelationGetRelationName(rel), "%u", rnode.relNode); + sprintf(RelationGetRelationName(rel), INT64_FORMAT, rnode.relNode); /* * We set up the lockRelId in case anything tries to lock the dummy - * relation. Note that this is fairly bogus since relNode may be - * different from the relation's OID. It shouldn't really matter though. + * relation. Note we are setting relId to just FirstNormalObjectId which + * is completely bogus. It shouldn't really matter though. * In recovery, we are running by ourselves and can't have any lock * conflicts. While syncing, we already hold AccessExclusiveLock. */ rel->rd_lockInfo.lockRelId.dbId = rnode.dbNode; - rel->rd_lockInfo.lockRelId.relId = rnode.relNode; + rel->rd_lockInfo.lockRelId.relId = FirstNormalObjectId; rel->rd_smgr = NULL; diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c index dfd5fb6..9bc1809 100644 --- a/src/backend/catalog/catalog.c +++ b/src/backend/catalog/catalog.c @@ -472,26 +472,16 @@ GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn) /* * GetNewRelFileNode - * Generate a new relfilenode number that is unique within the - * database of the given tablespace. + * Generate a new relfilenode number. * - * If the relfilenode will also be used as the relation's OID, pass the - * opened pg_class catalog, and this routine will guarantee that the result - * is also an unused OID within pg_class. If the result is to be used only - * as a relfilenode for an existing relation, pass NULL for pg_class. - * - * As with GetNewOidWithIndex(), there is some theoretical risk of a race - * condition, but it doesn't seem worth worrying about. - * - * Note: we don't support using this in bootstrap mode. All relations - * created by bootstrap have preassigned OIDs, so there's no need. + * We are using 56 bits for the relfilenode so we expect that to be unique for + * the cluster so if it is already exists then report and error. */ -Oid -GetNewRelFileNode(Oid reltablespace, Relation pg_class, char relpersistence) +RelNode +GetNewRelFileNode(Oid reltablespace, char relpersistence) { RelFileNodeBackend rnode; char *rpath; - bool collides; BackendId backend; /* @@ -525,40 +515,13 @@ GetNewRelFileNode(Oid reltablespace, Relation pg_class, char relpersistence) * are properly detected. */ rnode.backend = backend; + rnode.node.relNode = GetNewRelNode(); - do - { - CHECK_FOR_INTERRUPTS(); - - /* Generate the OID */ - if (pg_class) - rnode.node.relNode = GetNewOidWithIndex(pg_class, ClassOidIndexId, - Anum_pg_class_oid); - else - rnode.node.relNode = GetNewObjectId(); - - /* Check for existing file of same name */ - rpath = relpath(rnode, MAIN_FORKNUM); + /* Check for existing file of same name */ + rpath = relpath(rnode, MAIN_FORKNUM); - if (access(rpath, F_OK) == 0) - { - /* definite collision */ - collides = true; - } - else - { - /* - * Here we have a little bit of a dilemma: if errno is something - * other than ENOENT, should we declare a collision and loop? In - * practice it seems best to go ahead regardless of the errno. If - * there is a colliding file we will get an smgr failure when we - * attempt to create the new relation file. - */ - collides = false; - } - - pfree(rpath); - } while (collides); + if (access(rpath, F_OK) == 0) + elog(ERROR, "new relfilenode file already exists: \"%s\"\n", rpath); return rnode.node.relNode; } diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 7e99de8..67f3225 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -91,9 +91,9 @@ /* Potentially set by pg_upgrade_support functions */ Oid binary_upgrade_next_heap_pg_class_oid = InvalidOid; -Oid binary_upgrade_next_heap_pg_class_relfilenode = InvalidOid; +RelNode binary_upgrade_next_heap_pg_class_relfilenode = InvalidRelNode; Oid binary_upgrade_next_toast_pg_class_oid = InvalidOid; -Oid binary_upgrade_next_toast_pg_class_relfilenode = InvalidOid; +RelNode binary_upgrade_next_toast_pg_class_relfilenode = InvalidRelNode; static void AddNewRelationTuple(Relation pg_class_desc, Relation new_rel_desc, @@ -303,7 +303,7 @@ heap_create(const char *relname, Oid relnamespace, Oid reltablespace, Oid relid, - Oid relfilenode, + RelNode relfilenode, Oid accessmtd, TupleDesc tupDesc, char relkind, @@ -358,8 +358,8 @@ heap_create(const char *relname, * If relfilenode is unspecified by the caller then create storage * with oid same as relid. */ - if (!OidIsValid(relfilenode)) - relfilenode = relid; + if (!RelNodeIsValid(relfilenode)) + relfilenode = GetNewRelFileNode(reltablespace, relpersistence); } /* @@ -912,7 +912,7 @@ InsertPgClassTuple(Relation pg_class_desc, values[Anum_pg_class_reloftype - 1] = ObjectIdGetDatum(rd_rel->reloftype); values[Anum_pg_class_relowner - 1] = ObjectIdGetDatum(rd_rel->relowner); values[Anum_pg_class_relam - 1] = ObjectIdGetDatum(rd_rel->relam); - values[Anum_pg_class_relfilenode - 1] = ObjectIdGetDatum(rd_rel->relfilenode); + values[Anum_pg_class_relfilenode - 1] = Int64GetDatum(rd_rel->relfilenode); values[Anum_pg_class_reltablespace - 1] = ObjectIdGetDatum(rd_rel->reltablespace); values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages); values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples); @@ -1129,7 +1129,7 @@ heap_create_with_catalog(const char *relname, Oid new_type_oid; /* By default set to InvalidOid unless overridden by binary-upgrade */ - Oid relfilenode = InvalidOid; + RelNode relfilenode = InvalidRelNode; TransactionId relfrozenxid; MultiXactId relminmxid; @@ -1187,8 +1187,7 @@ heap_create_with_catalog(const char *relname, /* * Allocate an OID for the relation, unless we were told what to use. * - * The OID will be the relfilenode as well, so make sure it doesn't - * collide with either pg_class OIDs or existing physical files. + * Make sure that the Oid doesn't collide with other pg_class OIDs. */ if (!OidIsValid(relid)) { @@ -1210,13 +1209,13 @@ heap_create_with_catalog(const char *relname, relid = binary_upgrade_next_toast_pg_class_oid; binary_upgrade_next_toast_pg_class_oid = InvalidOid; - if (!OidIsValid(binary_upgrade_next_toast_pg_class_relfilenode)) + if (!RelNodeIsValid(binary_upgrade_next_toast_pg_class_relfilenode)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("toast relfilenode value not set when in binary upgrade mode"))); relfilenode = binary_upgrade_next_toast_pg_class_relfilenode; - binary_upgrade_next_toast_pg_class_relfilenode = InvalidOid; + binary_upgrade_next_toast_pg_class_relfilenode = InvalidRelNode; } } else @@ -1231,20 +1230,20 @@ heap_create_with_catalog(const char *relname, if (RELKIND_HAS_STORAGE(relkind)) { - if (!OidIsValid(binary_upgrade_next_heap_pg_class_relfilenode)) + if (!RelNodeIsValid(binary_upgrade_next_heap_pg_class_relfilenode)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("relfilenode value not set when in binary upgrade mode"))); relfilenode = binary_upgrade_next_heap_pg_class_relfilenode; - binary_upgrade_next_heap_pg_class_relfilenode = InvalidOid; + binary_upgrade_next_heap_pg_class_relfilenode = InvalidRelNode; } } } if (!OidIsValid(relid)) - relid = GetNewRelFileNode(reltablespace, pg_class_desc, - relpersistence); + relid = GetNewOidWithIndex(pg_class_desc, ClassOidIndexId, + Anum_pg_class_oid); } /* diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 5e3fc2b..7a19f45 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -87,7 +87,7 @@ /* Potentially set by pg_upgrade_support functions */ Oid binary_upgrade_next_index_pg_class_oid = InvalidOid; -Oid binary_upgrade_next_index_pg_class_relfilenode = InvalidOid; +RelNode binary_upgrade_next_index_pg_class_relfilenode = InvalidRelNode; /* * Pointer-free representation of variables used when reindexing system @@ -662,7 +662,7 @@ UpdateIndexRelation(Oid indexoid, * parent index; otherwise InvalidOid. * parentConstraintId: if creating a constraint on a partition, the OID * of the constraint in the parent; otherwise InvalidOid. - * relFileNode: normally, pass InvalidOid to get new storage. May be + * relFileNode: normally, pass InvalidRelNode to get new storage. May be * nonzero to attach an existing valid build. * indexInfo: same info executor uses to insert into the index * indexColNames: column names to use for index (List of char *) @@ -703,7 +703,7 @@ index_create(Relation heapRelation, Oid indexRelationId, Oid parentIndexRelid, Oid parentConstraintId, - Oid relFileNode, + RelNode relFileNode, IndexInfo *indexInfo, List *indexColNames, Oid accessMethodObjectId, @@ -735,7 +735,7 @@ index_create(Relation heapRelation, char relkind; TransactionId relfrozenxid; MultiXactId relminmxid; - bool create_storage = !OidIsValid(relFileNode); + bool create_storage = !RelNodeIsValid(relFileNode); /* constraint flags can only be set when a constraint is requested */ Assert((constr_flags == 0) || @@ -902,8 +902,7 @@ index_create(Relation heapRelation, /* * Allocate an OID for the index, unless we were told what to use. * - * The OID will be the relfilenode as well, so make sure it doesn't - * collide with either pg_class OIDs or existing physical files. + * Make sure it doesn't collide with other pg_class OIDs. */ if (!OidIsValid(indexRelationId)) { @@ -920,12 +919,12 @@ index_create(Relation heapRelation, /* Overide the index relfilenode */ if ((relkind == RELKIND_INDEX) && - (!OidIsValid(binary_upgrade_next_index_pg_class_relfilenode))) + (!RelNodeIsValid(binary_upgrade_next_index_pg_class_relfilenode))) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("index relfilenode value not set when in binary upgrade mode"))); relFileNode = binary_upgrade_next_index_pg_class_relfilenode; - binary_upgrade_next_index_pg_class_relfilenode = InvalidOid; + binary_upgrade_next_index_pg_class_relfilenode = InvalidRelNode; /* * Note that we want create_storage = true for binary upgrade. @@ -936,8 +935,8 @@ index_create(Relation heapRelation, } else { - indexRelationId = - GetNewRelFileNode(tableSpaceId, pg_class, relpersistence); + indexRelationId = GetNewOidWithIndex(pg_class, ClassOidIndexId, + Anum_pg_class_oid); } } @@ -1408,7 +1407,7 @@ index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId, InvalidOid, /* indexRelationId */ InvalidOid, /* parentIndexRelid */ InvalidOid, /* parentConstraintId */ - InvalidOid, /* relFileNode */ + InvalidRelNode, /* relFileNode */ newInfo, indexColNames, indexRelation->rd_rel->relam, diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 02a7e94..0cee4c6 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -1005,9 +1005,9 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, reltup2; Form_pg_class relform1, relform2; - Oid relfilenode1, + RelNode relfilenode1, relfilenode2; - Oid swaptemp; + RelNode swaptemp; char swptmpchr; /* We need writable copies of both pg_class tuples. */ @@ -1026,7 +1026,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, relfilenode1 = relform1->relfilenode; relfilenode2 = relform2->relfilenode; - if (OidIsValid(relfilenode1) && OidIsValid(relfilenode2)) + if (RelNodeIsValid(relfilenode1) && RelNodeIsValid(relfilenode2)) { /* * Normal non-mapped relations: swap relfilenodes, reltablespaces, @@ -1064,7 +1064,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, * Mapped-relation case. Here we have to swap the relation mappings * instead of modifying the pg_class columns. Both must be mapped. */ - if (OidIsValid(relfilenode1) || OidIsValid(relfilenode2)) + if (RelNodeIsValid(relfilenode1) || RelNodeIsValid(relfilenode2)) elog(ERROR, "cannot swap mapped relation \"%s\" with non-mapped relation", NameStr(relform1->relname)); @@ -1093,11 +1093,11 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class, * Fetch the mappings --- shouldn't fail, but be paranoid */ relfilenode1 = RelationMapOidToFilenode(r1, relform1->relisshared); - if (!OidIsValid(relfilenode1)) + if (!RelNodeIsValid(relfilenode1)) elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", NameStr(relform1->relname), r1); relfilenode2 = RelationMapOidToFilenode(r2, relform2->relisshared); - if (!OidIsValid(relfilenode2)) + if (!RelNodeIsValid(relfilenode2)) elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", NameStr(relform2->relname), r2); diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 560dcc8..9ac827c 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -1086,7 +1086,7 @@ DefineIndex(Oid relationId, * A valid stmt->oldNode implies that we already have a built form of the * index. The caller should also decline any index build. */ - Assert(!OidIsValid(stmt->oldNode) || (skip_build && !concurrent)); + Assert(!RelNodeIsValid(stmt->oldNode) || (skip_build && !concurrent)); /* * Make the catalog entries for the index, including constraints. This @@ -1316,7 +1316,7 @@ DefineIndex(Oid relationId, childStmt->idxname = NULL; childStmt->relation = NULL; childStmt->indexOid = InvalidOid; - childStmt->oldNode = InvalidOid; + childStmt->oldNode = InvalidRelNode; childStmt->oldCreateSubid = InvalidSubTransactionId; childStmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId; @@ -2897,7 +2897,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind, * particular this eliminates all shared catalogs.). */ if (RELKIND_HAS_STORAGE(classtuple->relkind) && - !OidIsValid(classtuple->relfilenode)) + !RelNodeIsValid(classtuple->relfilenode)) skip_rel = true; /* diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index ab592ce..aafca83 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -74,7 +74,7 @@ typedef struct sequence_magic typedef struct SeqTableData { Oid relid; /* pg_class OID of this sequence (hash key) */ - Oid filenode; /* last seen relfilenode of this sequence */ + RelNode filenode; /* last seen relfilenode of this sequence */ LocalTransactionId lxid; /* xact in which we last did a seq op */ bool last_valid; /* do we have a valid "last" value? */ int64 last; /* value last returned by nextval */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 3e83f37..3f17f7d 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -3305,7 +3305,7 @@ CheckRelationTableSpaceMove(Relation rel, Oid newTableSpaceId) void SetRelationTableSpace(Relation rel, Oid newTableSpaceId, - Oid newRelFileNode) + RelNode newRelFileNode) { Relation pg_class; HeapTuple tuple; @@ -3325,7 +3325,7 @@ SetRelationTableSpace(Relation rel, /* Update the pg_class row. */ rd_rel->reltablespace = (newTableSpaceId == MyDatabaseTableSpace) ? InvalidOid : newTableSpaceId; - if (OidIsValid(newRelFileNode)) + if (RelNodeIsValid(newRelFileNode)) rd_rel->relfilenode = newRelFileNode; CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); @@ -8573,7 +8573,7 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel, /* suppress schema rights check when rebuilding existing index */ check_rights = !is_rebuild; /* skip index build if phase 3 will do it or we're reusing an old one */ - skip_build = tab->rewrite > 0 || OidIsValid(stmt->oldNode); + skip_build = tab->rewrite > 0 || RelNodeIsValid(stmt->oldNode); /* suppress notices when rebuilding existing index */ quiet = is_rebuild; @@ -8597,7 +8597,7 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel, * DROP of the old edition of this index will have scheduled the storage * for deletion at commit, so cancel that pending deletion. */ - if (OidIsValid(stmt->oldNode)) + if (RelNodeIsValid(stmt->oldNode)) { Relation irel = index_open(address.objectId, NoLock); @@ -14291,7 +14291,7 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) { Relation rel; Oid reltoastrelid; - Oid newrelfilenode; + RelNode newrelfilenode; RelFileNode newrnode; List *reltoastidxids = NIL; ListCell *lc; @@ -14321,10 +14321,13 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) } /* - * Relfilenodes are not unique in databases across tablespaces, so we need - * to allocate a new one in the new tablespace. + * Generate a new relfilenode. Although relfilenodes are unique within a + * cluster, we are unable to use the old relfilenode since unused + * relfilenodes are not unlinked until commit. So if within a transaction, + * if we set the old tablespace again, we will get conflicting relfilenode + * file. */ - newrelfilenode = GetNewRelFileNode(newTableSpace, NULL, + newrelfilenode = GetNewRelFileNode(newTableSpace, rel->rd_rel->relpersistence); /* Open old and new relation */ diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 6bdad46..7372fc0 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -2771,7 +2771,7 @@ _outIndexStmt(StringInfo str, const IndexStmt *node) WRITE_NODE_FIELD(excludeOpNames); WRITE_STRING_FIELD(idxcomment); WRITE_OID_FIELD(indexOid); - WRITE_OID_FIELD(oldNode); + WRITE_UINT64_FIELD(oldNode); WRITE_UINT_FIELD(oldCreateSubid); WRITE_UINT_FIELD(oldFirstRelfilenodeSubid); WRITE_BOOL_FIELD(unique); diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 99efa26..4b6b2ca 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -1577,7 +1577,7 @@ generateClonedIndexStmt(RangeVar *heapRel, Relation source_idx, index->excludeOpNames = NIL; index->idxcomment = NULL; index->indexOid = InvalidOid; - index->oldNode = InvalidOid; + index->oldNode = InvalidRelNode; index->oldCreateSubid = InvalidSubTransactionId; index->oldFirstRelfilenodeSubid = InvalidSubTransactionId; index->unique = idxrec->indisunique; @@ -2200,7 +2200,7 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) index->excludeOpNames = NIL; index->idxcomment = NULL; index->indexOid = InvalidOid; - index->oldNode = InvalidOid; + index->oldNode = InvalidRelNode; index->oldCreateSubid = InvalidSubTransactionId; index->oldFirstRelfilenodeSubid = InvalidSubTransactionId; index->transformed = false; diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 18cf931..ffd89b6 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -156,6 +156,7 @@ xlog_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) break; case XLOG_NOOP: case XLOG_NEXTOID: + case XLOG_NEXT_RELFILENODE: case XLOG_SWITCH: case XLOG_BACKUP_END: case XLOG_PARAMETER_CHANGE: diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index c2d9be8..8b228b8 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -5268,7 +5268,7 @@ DisplayMapping(HTAB *tuplecid_data) hash_seq_init(&hstat, tuplecid_data); while ((ent = (ReorderBufferTupleCidEnt *) hash_seq_search(&hstat)) != NULL) { - elog(DEBUG3, "mapping: node: %u/%u/%u tid: %u/%u cmin: %u, cmax: %u", + elog(DEBUG3, "mapping: node: %u/%u/" INT64_FORMAT "tid: %u/%u cmin: %u, cmax: %u", ent->key.relnode.dbNode, ent->key.relnode.spcNode, ent->key.relnode.relNode, diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 5014fe6..42f551d 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -1994,7 +1994,7 @@ BufferSync(int flags) item = &CkptBufferIds[num_to_scan++]; item->buf_id = buf_id; item->tsId = bufHdr->tag.spcOid; - item->relNode = bufHdr->tag.fileNode; + item->relNode = BufTagGetFileNode(bufHdr->tag); item->forkNum = bufHdr->tag.forkNum; item->blockNum = bufHdr->tag.blockNum; } diff --git a/src/backend/storage/freespace/fsmpage.c b/src/backend/storage/freespace/fsmpage.c index d165b35..3c0c88d 100644 --- a/src/backend/storage/freespace/fsmpage.c +++ b/src/backend/storage/freespace/fsmpage.c @@ -273,7 +273,7 @@ restart: BlockNumber blknum; BufferGetTag(buf, &rnode, &forknum, &blknum); - elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/%u", + elog(DEBUG1, "fixing corrupt FSM block %u, relation %u/%u/" INT64_FORMAT, blknum, rnode.spcNode, rnode.dbNode, rnode.relNode); /* make sure we hold an exclusive lock */ diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index 6c7cf6c..1eb6d78 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -53,3 +53,4 @@ XactTruncationLock 44 # 45 was XactTruncationLock until removal of BackendRandomLock WrapLimitsVacuumLock 46 NotifyQueueTailLock 47 +RelNodeGenLock 48 \ No newline at end of file diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index d71a557..a550823 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -156,7 +156,7 @@ smgropen(RelFileNode rnode, BackendId backend) /* First time through: initialize the hash table */ HASHCTL ctl; - ctl.keysize = sizeof(RelFileNodeBackend); + ctl.keysize = SizeOfRelFileNodeBackend; ctl.entrysize = sizeof(SMgrRelationData); SMgrRelationHash = hash_create("smgr relation table", 400, &ctl, HASH_ELEM | HASH_BLOBS); diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c index 3a2f2e1..9a8d6a5 100644 --- a/src/backend/utils/adt/dbsize.c +++ b/src/backend/utils/adt/dbsize.c @@ -850,7 +850,7 @@ Datum pg_relation_filenode(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); - Oid result; + RelNode result; HeapTuple tuple; Form_pg_class relform; @@ -870,15 +870,15 @@ pg_relation_filenode(PG_FUNCTION_ARGS) else { /* no storage, return NULL */ - result = InvalidOid; + result = InvalidRelNode; } ReleaseSysCache(tuple); - if (!OidIsValid(result)) + if (!RelNodeIsValid(result)) PG_RETURN_NULL(); - PG_RETURN_OID(result); + PG_RETURN_INT64(result); } /* @@ -898,11 +898,11 @@ Datum pg_filenode_relation(PG_FUNCTION_ARGS) { Oid reltablespace = PG_GETARG_OID(0); - Oid relfilenode = PG_GETARG_OID(1); + RelNode relfilenode = PG_GETARG_INT64(1); Oid heaprel; /* test needed so RelidByRelfilenode doesn't misbehave */ - if (!OidIsValid(relfilenode)) + if (!RelNodeIsValid(relfilenode)) PG_RETURN_NULL(); heaprel = RelidByRelfilenode(reltablespace, relfilenode); @@ -953,13 +953,13 @@ pg_relation_filepath(PG_FUNCTION_ARGS) else { /* no storage, return NULL */ - rnode.relNode = InvalidOid; + rnode.relNode = InvalidRelNode; /* some compilers generate warnings without these next two lines */ rnode.dbNode = InvalidOid; rnode.spcNode = InvalidOid; } - if (!OidIsValid(rnode.relNode)) + if (!RelNodeIsValid(rnode.relNode)) { ReleaseSysCache(tuple); PG_RETURN_NULL(); diff --git a/src/backend/utils/adt/pg_upgrade_support.c b/src/backend/utils/adt/pg_upgrade_support.c index 67b9675e..568ff1f 100644 --- a/src/backend/utils/adt/pg_upgrade_support.c +++ b/src/backend/utils/adt/pg_upgrade_support.c @@ -98,10 +98,10 @@ binary_upgrade_set_next_heap_pg_class_oid(PG_FUNCTION_ARGS) Datum binary_upgrade_set_next_heap_relfilenode(PG_FUNCTION_ARGS) { - Oid nodeoid = PG_GETARG_OID(0); + RelNode relnode = PG_GETARG_INT64(0); CHECK_IS_BINARY_UPGRADE; - binary_upgrade_next_heap_pg_class_relfilenode = nodeoid; + binary_upgrade_next_heap_pg_class_relfilenode = relnode; PG_RETURN_VOID(); } @@ -120,10 +120,10 @@ binary_upgrade_set_next_index_pg_class_oid(PG_FUNCTION_ARGS) Datum binary_upgrade_set_next_index_relfilenode(PG_FUNCTION_ARGS) { - Oid nodeoid = PG_GETARG_OID(0); + RelNode relnode = PG_GETARG_INT64(0); CHECK_IS_BINARY_UPGRADE; - binary_upgrade_next_index_pg_class_relfilenode = nodeoid; + binary_upgrade_next_index_pg_class_relfilenode = relnode; PG_RETURN_VOID(); } @@ -142,10 +142,10 @@ binary_upgrade_set_next_toast_pg_class_oid(PG_FUNCTION_ARGS) Datum binary_upgrade_set_next_toast_relfilenode(PG_FUNCTION_ARGS) { - Oid nodeoid = PG_GETARG_OID(0); + RelNode relnode = PG_GETARG_INT64(0); CHECK_IS_BINARY_UPGRADE; - binary_upgrade_next_toast_pg_class_relfilenode = nodeoid; + binary_upgrade_next_toast_pg_class_relfilenode = relnode; PG_RETURN_VOID(); } diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 2707fed..515bd44 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1343,7 +1343,7 @@ RelationInitPhysicalAddr(Relation relation) relation->rd_node.relNode = RelationMapOidToFilenode(relation->rd_id, relation->rd_rel->relisshared); - if (!OidIsValid(relation->rd_node.relNode)) + if (!RelNodeIsValid(relation->rd_node.relNode)) elog(ERROR, "could not find relation mapping for relation \"%s\", OID %u", RelationGetRelationName(relation), relation->rd_id); } @@ -1958,13 +1958,13 @@ formrdesc(const char *relationName, Oid relationReltype, /* * All relations made with formrdesc are mapped. This is necessarily so * because there is no other way to know what filenode they currently - * have. In bootstrap mode, add them to the initial relation mapper data, - * specifying that the initial filenode is the same as the OID. + * have. In bootstrap mode, generate a new relfilenode and add them to the + * initial relation mapper data. */ - relation->rd_rel->relfilenode = InvalidOid; + relation->rd_rel->relfilenode = InvalidRelNode; if (IsBootstrapProcessingMode()) RelationMapUpdateMap(RelationGetRelid(relation), - RelationGetRelid(relation), + GetNewRelNode(), isshared, true); /* @@ -3434,7 +3434,7 @@ RelationBuildLocalRelation(const char *relname, TupleDesc tupDesc, Oid relid, Oid accessmtd, - Oid relfilenode, + RelNode relfilenode, Oid reltablespace, bool shared_relation, bool mapped_relation, @@ -3605,7 +3605,7 @@ RelationBuildLocalRelation(const char *relname, if (mapped_relation) { - rel->rd_rel->relfilenode = InvalidOid; + rel->rd_rel->relfilenode = InvalidRelNode; /* Add it to the active mapping information */ RelationMapUpdateMap(relid, relfilenode, shared_relation, true); } @@ -3674,7 +3674,7 @@ RelationBuildLocalRelation(const char *relname, void RelationSetNewRelfilenode(Relation relation, char persistence) { - Oid newrelfilenode; + RelNode newrelfilenode; Relation pg_class; HeapTuple tuple; Form_pg_class classform; @@ -3683,7 +3683,7 @@ RelationSetNewRelfilenode(Relation relation, char persistence) RelFileNode newrnode; /* Allocate a new relfilenode */ - newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL, + newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, persistence); /* diff --git a/src/backend/utils/cache/relfilenodemap.c b/src/backend/utils/cache/relfilenodemap.c index 70c323c..4d3e068 100644 --- a/src/backend/utils/cache/relfilenodemap.c +++ b/src/backend/utils/cache/relfilenodemap.c @@ -37,7 +37,7 @@ static ScanKeyData relfilenode_skey[2]; typedef struct { Oid reltablespace; - Oid relfilenode; + RelNode relfilenode; } RelfilenodeMapKey; typedef struct @@ -135,7 +135,7 @@ InitializeRelfilenodeMap(void) * Returns InvalidOid if no relation matching the criteria could be found. */ Oid -RelidByRelfilenode(Oid reltablespace, Oid relfilenode) +RelidByRelfilenode(Oid reltablespace, RelNode relfilenode) { RelfilenodeMapKey key; RelfilenodeMapEntry *entry; @@ -196,7 +196,7 @@ RelidByRelfilenode(Oid reltablespace, Oid relfilenode) /* set scan arguments */ skey[0].sk_argument = ObjectIdGetDatum(reltablespace); - skey[1].sk_argument = ObjectIdGetDatum(relfilenode); + skey[1].sk_argument = Int64GetDatum(relfilenode); scandesc = systable_beginscan(relation, ClassTblspcRelfilenodeIndexId, @@ -213,7 +213,7 @@ RelidByRelfilenode(Oid reltablespace, Oid relfilenode) if (found) elog(ERROR, - "unexpected duplicate for tablespace %u, relfilenode %u", + "unexpected duplicate for tablespace %u, relfilenode" INT64_FORMAT, reltablespace, relfilenode); found = true; diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 4f6811f..1a637b0 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -79,7 +79,7 @@ typedef struct RelMapping { Oid mapoid; /* OID of a catalog */ - Oid mapfilenode; /* its filenode number */ + RelNode mapfilenode; /* its filenode number */ } RelMapping; typedef struct RelMapFile @@ -132,7 +132,7 @@ static RelMapFile pending_local_updates; /* non-export function prototypes */ -static void apply_map_update(RelMapFile *map, Oid relationId, Oid fileNode, +static void apply_map_update(RelMapFile *map, Oid relationId, RelNode fileNode, bool add_okay); static void merge_map_updates(RelMapFile *map, const RelMapFile *updates, bool add_okay); @@ -155,7 +155,7 @@ static void perform_relmap_update(bool shared, const RelMapFile *updates); * Returns InvalidOid if the OID is not known (which should never happen, * but the caller is in a better position to report a meaningful error). */ -Oid +RelNode RelationMapOidToFilenode(Oid relationId, bool shared) { const RelMapFile *map; @@ -193,7 +193,7 @@ RelationMapOidToFilenode(Oid relationId, bool shared) } } - return InvalidOid; + return InvalidRelNode; } /* @@ -209,7 +209,7 @@ RelationMapOidToFilenode(Oid relationId, bool shared) * relfilenode doesn't pertain to a mapped relation. */ Oid -RelationMapFilenodeToOid(Oid filenode, bool shared) +RelationMapFilenodeToOid(RelNode filenode, bool shared) { const RelMapFile *map; int32 i; @@ -258,7 +258,7 @@ RelationMapFilenodeToOid(Oid filenode, bool shared) * immediately. Otherwise it is made pending until CommandCounterIncrement. */ void -RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared, +RelationMapUpdateMap(Oid relationId, RelNode fileNode, bool shared, bool immediate) { RelMapFile *map; @@ -316,7 +316,8 @@ RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared, * add_okay = false to draw an error if not. */ static void -apply_map_update(RelMapFile *map, Oid relationId, Oid fileNode, bool add_okay) +apply_map_update(RelMapFile *map, Oid relationId, RelNode fileNode, + bool add_okay) { int32 i; diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index 781f8b8..85ed88c 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -79,8 +79,8 @@ pg_control_system(PG_FUNCTION_ARGS) Datum pg_control_checkpoint(PG_FUNCTION_ARGS) { - Datum values[18]; - bool nulls[18]; + Datum values[19]; + bool nulls[19]; TupleDesc tupdesc; HeapTuple htup; ControlFileData *ControlFile; @@ -129,6 +129,8 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 18, "checkpoint_time", TIMESTAMPTZOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 19, "next_relfilenode", + INT8OID, -1, 0); tupdesc = BlessTupleDesc(tupdesc); /* Read the control file. */ @@ -202,6 +204,9 @@ pg_control_checkpoint(PG_FUNCTION_ARGS) values[17] = TimestampTzGetDatum(time_t_to_timestamptz(ControlFile->checkPointCopy.time)); nulls[17] = false; + values[18] = Int64GetDatum(ControlFile->checkPointCopy.nextRelNode); + nulls[18] = false; + htup = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(htup)); diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index 7e69475..94ec594 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -520,9 +520,9 @@ main(int argc, char *argv[]) mode = PG_MODE_ENABLE; break; case 'f': - if (!option_parse_int(optarg, "-f/--filenode", 0, - INT_MAX, - NULL)) + if (!option_parse_int64(optarg, "-f/--filenode", 0, + LLONG_MAX, + NULL)) exit(1); only_filenode = pstrdup(optarg); break; diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index f911f98..2513fc3 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -250,6 +250,8 @@ main(int argc, char *argv[]) printf(_("Latest checkpoint's NextXID: %u:%u\n"), EpochFromFullTransactionId(ControlFile->checkPointCopy.nextXid), XidFromFullTransactionId(ControlFile->checkPointCopy.nextXid)); + printf(_("Latest checkpoint's NextRelFileNode: " INT64_FORMAT "\n"), + ControlFile->checkPointCopy.nextRelNode); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile->checkPointCopy.nextOid); printf(_("Latest checkpoint's NextMultiXactId: %u\n"), diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 4485ea8..d1b0eb9 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -4658,12 +4658,12 @@ binary_upgrade_set_pg_class_oids(Archive *fout, { PQExpBuffer upgrade_query = createPQExpBuffer(); PGresult *upgrade_res; - Oid relfilenode; + RelNode relfilenode; Oid toast_oid; - Oid toast_relfilenode; + RelNode toast_relfilenode; char relkind; Oid toast_index_oid; - Oid toast_index_relfilenode; + RelNode toast_index_relfilenode; /* * Preserve the OID and relfilenode of the table, table's index, table's @@ -4689,16 +4689,16 @@ binary_upgrade_set_pg_class_oids(Archive *fout, relkind = *PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "relkind")); - relfilenode = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "relfilenode"))); + relfilenode = atorelnode(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "relfilenode"))); toast_oid = atooid(PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "reltoastrelid"))); - toast_relfilenode = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "toast_relfilenode"))); + toast_relfilenode = atorelnode(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "toast_relfilenode"))); toast_index_oid = atooid(PQgetvalue(upgrade_res, 0, PQfnumber(upgrade_res, "indexrelid"))); - toast_index_relfilenode = atooid(PQgetvalue(upgrade_res, 0, - PQfnumber(upgrade_res, "toast_index_relfilenode"))); + toast_index_relfilenode = atorelnode(PQgetvalue(upgrade_res, 0, + PQfnumber(upgrade_res, "toast_index_relfilenode"))); appendPQExpBufferStr(upgrade_buffer, "\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n"); @@ -4714,9 +4714,9 @@ binary_upgrade_set_pg_class_oids(Archive *fout, * partitioned tables have a relfilenode, which should not be preserved * when upgrading. */ - if (OidIsValid(relfilenode) && relkind != RELKIND_PARTITIONED_TABLE) + if (RelNodeIsValid(relfilenode) && relkind != RELKIND_PARTITIONED_TABLE) appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n", + "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('" INT64_FORMAT "'::pg_catalog.int8);\n", relfilenode); /* @@ -4730,7 +4730,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, "SELECT pg_catalog.binary_upgrade_set_next_toast_pg_class_oid('%u'::pg_catalog.oid);\n", toast_oid); appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('%u'::pg_catalog.oid);\n", + "SELECT pg_catalog.binary_upgrade_set_next_toast_relfilenode('" INT64_FORMAT "'::pg_catalog.int8);\n", toast_relfilenode); /* every toast table has an index */ @@ -4738,7 +4738,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n", toast_index_oid); appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", + "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" INT64_FORMAT "'::pg_catalog.int8);\n", toast_index_relfilenode); } @@ -4751,7 +4751,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, "SELECT pg_catalog.binary_upgrade_set_next_index_pg_class_oid('%u'::pg_catalog.oid);\n", pg_class_oid); appendPQExpBuffer(upgrade_buffer, - "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n", + "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('" INT64_FORMAT "'::pg_catalog.int8);\n", relfilenode); } diff --git a/src/bin/pg_rewind/filemap.c b/src/bin/pg_rewind/filemap.c index 7211090..2674b00 100644 --- a/src/bin/pg_rewind/filemap.c +++ b/src/bin/pg_rewind/filemap.c @@ -535,11 +535,11 @@ isRelDataFile(const char *path) */ rnode.spcNode = InvalidOid; rnode.dbNode = InvalidOid; - rnode.relNode = InvalidOid; + rnode.relNode = InvalidRelNode; segNo = 0; matched = false; - nmatch = sscanf(path, "global/%u.%u", &rnode.relNode, &segNo); + nmatch = sscanf(path, "global/" INT64_FORMAT ".%u", &rnode.relNode, &segNo); if (nmatch == 1 || nmatch == 2) { rnode.spcNode = GLOBALTABLESPACE_OID; @@ -548,7 +548,7 @@ isRelDataFile(const char *path) } else { - nmatch = sscanf(path, "base/%u/%u.%u", + nmatch = sscanf(path, "base/%u/" INT64_FORMAT ".%u", &rnode.dbNode, &rnode.relNode, &segNo); if (nmatch == 2 || nmatch == 3) { @@ -557,7 +557,7 @@ isRelDataFile(const char *path) } else { - nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/%u.%u", + nmatch = sscanf(path, "pg_tblspc/%u/" TABLESPACE_VERSION_DIRECTORY "/%u/" INT64_FORMAT ".%u", &rnode.spcNode, &rnode.dbNode, &rnode.relNode, &segNo); if (nmatch == 3 || nmatch == 4) diff --git a/src/bin/pg_upgrade/info.c b/src/bin/pg_upgrade/info.c index 69ef231..d3c5d53 100644 --- a/src/bin/pg_upgrade/info.c +++ b/src/bin/pg_upgrade/info.c @@ -383,8 +383,8 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) i_reloid, i_indtable, i_toastheap, - i_relfilenode, i_reltablespace; + RelNode i_relfilenode; char query[QUERY_ALLOC]; char *last_namespace = NULL, *last_tablespace = NULL; @@ -511,7 +511,7 @@ get_rel_infos(ClusterInfo *cluster, DbInfo *dbinfo) relname = PQgetvalue(res, relnum, i_relname); curr->relname = pg_strdup(relname); - curr->relfilenode = atooid(PQgetvalue(res, relnum, i_relfilenode)); + curr->relfilenode = atorelnode(PQgetvalue(res, relnum, i_relfilenode)); curr->tblsp_alloc = false; /* Is the tablespace oid non-default? */ diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c index f66bbd5..c8bbedb 100644 --- a/src/bin/pg_upgrade/pg_upgrade.c +++ b/src/bin/pg_upgrade/pg_upgrade.c @@ -15,10 +15,8 @@ * oids are the same between old and new clusters. This is important * because toast oids are stored as toast pointers in user tables. * - * While pg_class.oid and pg_class.relfilenode are initially the same in a - * cluster, they can diverge due to CLUSTER, REINDEX, or VACUUM FULL. We - * control assignments of pg_class.relfilenode because we want the filenames - * to match between the old and new cluster. + * We control assignments of pg_class.relfilenode because we want the + * filenames to match between the old and new cluster. * * We control assignment of pg_tablespace.oid because we want the oid to match * between the old and new cluster. diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h index 0aca0a7..13975b7 100644 --- a/src/bin/pg_upgrade/pg_upgrade.h +++ b/src/bin/pg_upgrade/pg_upgrade.h @@ -130,7 +130,7 @@ typedef struct char *nspname; /* namespace name */ char *relname; /* relation name */ Oid reloid; /* relation OID */ - Oid relfilenode; /* relation file node */ + RelNode relfilenode; /* relation file node */ Oid indtable; /* if index, OID of its table, else 0 */ Oid toastheap; /* if toast table, OID of base table, else 0 */ char *tablespace; /* tablespace path; "" for cluster default */ @@ -154,7 +154,7 @@ typedef struct const char *old_tablespace_suffix; const char *new_tablespace_suffix; Oid db_oid; - Oid relfilenode; + RelNode relfilenode; /* the rest are used only for logging and error reporting */ char *nspname; /* namespaces */ char *relname; diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c index 2f4deb3..10e6a6c 100644 --- a/src/bin/pg_upgrade/relfilenode.c +++ b/src/bin/pg_upgrade/relfilenode.c @@ -190,14 +190,14 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro else snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno); - snprintf(old_file, sizeof(old_file), "%s%s/%u/%u%s%s", + snprintf(old_file, sizeof(old_file), "%s%s/%u/" INT64_FORMAT "%s%s", map->old_tablespace, map->old_tablespace_suffix, map->db_oid, map->relfilenode, type_suffix, extent_suffix); - snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s", + snprintf(new_file, sizeof(new_file), "%s%s/%u/" INT64_FORMAT "%s%s", map->new_tablespace, map->new_tablespace_suffix, map->db_oid, diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index a6251e1..54c3da7 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -518,13 +518,13 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record) XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk); if (forknum != MAIN_FORKNUM) - printf(", blkref #%d: rel %u/%u/%u fork %s blk %u", + printf(", blkref #%d: rel %u/%u/" INT64_FORMAT " fork %s blk %u", block_id, rnode.spcNode, rnode.dbNode, rnode.relNode, forkNames[forknum], blk); else - printf(", blkref #%d: rel %u/%u/%u blk %u", + printf(", blkref #%d: rel %u/%u/" INT64_FORMAT "blk %u", block_id, rnode.spcNode, rnode.dbNode, rnode.relNode, blk); @@ -548,7 +548,7 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record) continue; XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk); - printf("\tblkref #%d: rel %u/%u/%u fork %s blk %u", + printf("\tblkref #%d: rel %u/%u/" INT64_FORMAT " fork %s blk %u", block_id, rnode.spcNode, rnode.dbNode, rnode.relNode, forkNames[forknum], diff --git a/src/common/relpath.c b/src/common/relpath.c index 636c96e..27b8547 100644 --- a/src/common/relpath.c +++ b/src/common/relpath.c @@ -138,7 +138,7 @@ GetDatabasePath(Oid dbNode, Oid spcNode) * the trouble considering BackendId is just int anyway. */ char * -GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode, +GetRelationPath(Oid dbNode, Oid spcNode, RelNode relNode, int backendId, ForkNumber forkNumber) { char *path; @@ -149,10 +149,10 @@ GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode, Assert(dbNode == 0); Assert(backendId == InvalidBackendId); if (forkNumber != MAIN_FORKNUM) - path = psprintf("global/%u_%s", + path = psprintf("global/" INT64_FORMAT "_%s", relNode, forkNames[forkNumber]); else - path = psprintf("global/%u", relNode); + path = psprintf("global/" INT64_FORMAT, relNode); } else if (spcNode == DEFAULTTABLESPACE_OID) { @@ -160,21 +160,21 @@ GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode, if (backendId == InvalidBackendId) { if (forkNumber != MAIN_FORKNUM) - path = psprintf("base/%u/%u_%s", + path = psprintf("base/%u/" INT64_FORMAT "_%s", dbNode, relNode, forkNames[forkNumber]); else - path = psprintf("base/%u/%u", + path = psprintf("base/%u/" INT64_FORMAT, dbNode, relNode); } else { if (forkNumber != MAIN_FORKNUM) - path = psprintf("base/%u/t%d_%u_%s", + path = psprintf("base/%u/t%d_" INT64_FORMAT "_%s", dbNode, backendId, relNode, forkNames[forkNumber]); else - path = psprintf("base/%u/t%d_%u", + path = psprintf("base/%u/t%d_" INT64_FORMAT, dbNode, backendId, relNode); } } @@ -184,24 +184,24 @@ GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode, if (backendId == InvalidBackendId) { if (forkNumber != MAIN_FORKNUM) - path = psprintf("pg_tblspc/%u/%s/%u/%u_%s", + path = psprintf("pg_tblspc/%u/%s/%u/" INT64_FORMAT "_%s", spcNode, TABLESPACE_VERSION_DIRECTORY, dbNode, relNode, forkNames[forkNumber]); else - path = psprintf("pg_tblspc/%u/%s/%u/%u", + path = psprintf("pg_tblspc/%u/%s/%u/" INT64_FORMAT, spcNode, TABLESPACE_VERSION_DIRECTORY, dbNode, relNode); } else { if (forkNumber != MAIN_FORKNUM) - path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u_%s", + path = psprintf("pg_tblspc/%u/%s/%u/t%d_" INT64_FORMAT "_%s", spcNode, TABLESPACE_VERSION_DIRECTORY, dbNode, backendId, relNode, forkNames[forkNumber]); else - path = psprintf("pg_tblspc/%u/%s/%u/t%d_%u", + path = psprintf("pg_tblspc/%u/%s/%u/t%d_" INT64_FORMAT, spcNode, TABLESPACE_VERSION_DIRECTORY, dbNode, backendId, relNode); } diff --git a/src/fe_utils/option_utils.c b/src/fe_utils/option_utils.c index abea881..2cb3370 100644 --- a/src/fe_utils/option_utils.c +++ b/src/fe_utils/option_utils.c @@ -82,3 +82,45 @@ option_parse_int(const char *optarg, const char *optname, *result = val; return true; } + +/* + * option_parse_int64 + * + * Same as option_parse_int but parse int64. + */ +bool +option_parse_int64(const char *optarg, const char *optname, + int64 min_range, int64 max_range, + int64 *result) +{ + char *endptr; + int64 val; + + errno = 0; + val = strtoi64(optarg, &endptr, 10); + + /* + * Skip any trailing whitespace; if anything but whitespace remains before + * the terminating character, fail. + */ + while (*endptr != '\0' && isspace((unsigned char) *endptr)) + endptr++; + + if (*endptr != '\0') + { + pg_log_error("invalid value \"%s\" for option %s", + optarg, optname); + return false; + } + + if (errno == ERANGE || val < min_range || val > max_range) + { + pg_log_error("%s must be in range " INT64_FORMAT ".." INT64_FORMAT, + optname, min_range, max_range); + return false; + } + + if (result) + *result = val; + return true; +} diff --git a/src/include/access/transam.h b/src/include/access/transam.h index 9a2816d..8113335 100644 --- a/src/include/access/transam.h +++ b/src/include/access/transam.h @@ -217,6 +217,9 @@ typedef struct VariableCacheData */ Oid nextOid; /* next OID to assign */ uint32 oidCount; /* OIDs available before must do XLOG work */ + RelNode nextRelNode; /* next relfilenode to assign */ + uint32 relnodecount; /* Relfilenode available before must do XLOG + work */ /* * These fields are protected by XidGenLock. @@ -298,6 +301,7 @@ extern void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, extern void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid); extern bool ForceTransactionIdLimitUpdate(void); extern Oid GetNewObjectId(void); +extern RelNode GetNewRelNode(void); extern void StopGeneratingPinnedObjectIds(void); #ifdef USE_ASSERT_CHECKING diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 4b45ac6..cd5ab2d 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -233,6 +233,7 @@ extern bool CreateRestartPoint(int flags); extern WALAvailability GetWALAvailability(XLogRecPtr targetLSN); extern XLogRecPtr CalculateMaxmumSafeLSN(void); extern void XLogPutNextOid(Oid nextOid); +extern void XLogPutNextRelFileNode(RelNode nextrelnode); extern XLogRecPtr XLogRestorePoint(const char *rpName); extern void UpdateFullPageWrites(void); extern void GetFullPageWriteInfo(XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p); diff --git a/src/include/catalog/binary_upgrade.h b/src/include/catalog/binary_upgrade.h index 0b6944b..401bfa2 100644 --- a/src/include/catalog/binary_upgrade.h +++ b/src/include/catalog/binary_upgrade.h @@ -22,11 +22,11 @@ extern PGDLLIMPORT Oid binary_upgrade_next_mrng_pg_type_oid; extern PGDLLIMPORT Oid binary_upgrade_next_mrng_array_pg_type_oid; extern PGDLLIMPORT Oid binary_upgrade_next_heap_pg_class_oid; -extern PGDLLIMPORT Oid binary_upgrade_next_heap_pg_class_relfilenode; +extern PGDLLIMPORT RelNode binary_upgrade_next_heap_pg_class_relfilenode; extern PGDLLIMPORT Oid binary_upgrade_next_index_pg_class_oid; -extern PGDLLIMPORT Oid binary_upgrade_next_index_pg_class_relfilenode; +extern PGDLLIMPORT RelNode binary_upgrade_next_index_pg_class_relfilenode; extern PGDLLIMPORT Oid binary_upgrade_next_toast_pg_class_oid; -extern PGDLLIMPORT Oid binary_upgrade_next_toast_pg_class_relfilenode; +extern PGDLLIMPORT RelNode binary_upgrade_next_toast_pg_class_relfilenode; extern PGDLLIMPORT Oid binary_upgrade_next_pg_enum_oid; extern PGDLLIMPORT Oid binary_upgrade_next_pg_authid_oid; diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h index 60c1215..1b83c79 100644 --- a/src/include/catalog/catalog.h +++ b/src/include/catalog/catalog.h @@ -15,6 +15,7 @@ #define CATALOG_H #include "catalog/pg_class.h" +#include "storage/relfilenode.h" #include "utils/relcache.h" @@ -38,7 +39,6 @@ extern bool IsPinnedObject(Oid classId, Oid objectId); extern Oid GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn); -extern Oid GetNewRelFileNode(Oid reltablespace, Relation pg_class, - char relpersistence); +extern RelNode GetNewRelFileNode(Oid reltablespace, char relpersistence); #endif /* CATALOG_H */ diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index c4757bd..66d41af 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -50,7 +50,7 @@ extern Relation heap_create(const char *relname, Oid relnamespace, Oid reltablespace, Oid relid, - Oid relfilenode, + RelNode relfilenode, Oid accessmtd, TupleDesc tupDesc, char relkind, diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index a1d6e3b..1e79ec9 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -71,7 +71,7 @@ extern Oid index_create(Relation heapRelation, Oid indexRelationId, Oid parentIndexRelid, Oid parentConstraintId, - Oid relFileNode, + RelNode relFileNode, IndexInfo *indexInfo, List *indexColNames, Oid accessMethodObjectId, diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 304e8c1..4659ed3 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -52,13 +52,13 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat /* access method; 0 if not a table / index */ Oid relam BKI_DEFAULT(heap) BKI_LOOKUP_OPT(pg_am); - /* identifier of physical storage file */ - /* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */ - Oid relfilenode BKI_DEFAULT(0); - /* identifier of table space for relation (0 means default for database) */ Oid reltablespace BKI_DEFAULT(0) BKI_LOOKUP_OPT(pg_tablespace); + /* identifier of physical storage file */ + /* relfilenode == 0 means it is a "mapped" relation, see relmapper.c */ + int64 relfilenode BKI_DEFAULT(0); + /* # of blocks (not always up-to-date) */ int32 relpages BKI_DEFAULT(0); @@ -154,7 +154,7 @@ typedef FormData_pg_class *Form_pg_class; DECLARE_UNIQUE_INDEX_PKEY(pg_class_oid_index, 2662, ClassOidIndexId, on pg_class using btree(oid oid_ops)); DECLARE_UNIQUE_INDEX(pg_class_relname_nsp_index, 2663, ClassNameNspIndexId, on pg_class using btree(relname name_ops, relnamespace oid_ops)); -DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, ClassTblspcRelfilenodeIndexId, on pg_class using btree(reltablespace oid_ops, relfilenode oid_ops)); +DECLARE_INDEX(pg_class_tblspc_relfilenode_index, 3455, ClassTblspcRelfilenodeIndexId, on pg_class using btree(reltablespace oid_ops, relfilenode int8_ops)); #ifdef EXPOSE_TO_CLIENT_CODE diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 1f3dc24..27d584d 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -41,6 +41,7 @@ typedef struct CheckPoint * timeline (equals ThisTimeLineID otherwise) */ bool fullPageWrites; /* current full_page_writes */ FullTransactionId nextXid; /* next free transaction ID */ + RelNode nextRelNode; /* next relfile node */ Oid nextOid; /* next free OID */ MultiXactId nextMulti; /* next free MultiXactId */ MultiXactOffset nextMultiOffset; /* next free MultiXact offset */ @@ -78,6 +79,7 @@ typedef struct CheckPoint #define XLOG_FPI 0xB0 /* 0xC0 is used in Postgres 9.5-11 */ #define XLOG_OVERWRITE_CONTRECORD 0xD0 +#define XLOG_NEXT_RELFILENODE 0xE0 /* diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 7f1ee97..c0f0d74 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -7287,11 +7287,11 @@ proname => 'pg_indexes_size', provolatile => 'v', prorettype => 'int8', proargtypes => 'regclass', prosrc => 'pg_indexes_size' }, { oid => '2999', descr => 'filenode identifier of relation', - proname => 'pg_relation_filenode', provolatile => 's', prorettype => 'oid', + proname => 'pg_relation_filenode', provolatile => 's', prorettype => 'int8', proargtypes => 'regclass', prosrc => 'pg_relation_filenode' }, { oid => '3454', descr => 'relation OID for filenode and tablespace', proname => 'pg_filenode_relation', provolatile => 's', - prorettype => 'regclass', proargtypes => 'oid oid', + prorettype => 'regclass', proargtypes => 'oid int8', prosrc => 'pg_filenode_relation' }, { oid => '3034', descr => 'file path of relation', proname => 'pg_relation_filepath', provolatile => 's', prorettype => 'text', @@ -11059,15 +11059,15 @@ prosrc => 'binary_upgrade_set_missing_value' }, { oid => '4545', descr => 'for use by pg_upgrade', proname => 'binary_upgrade_set_next_heap_relfilenode', provolatile => 'v', - proparallel => 'u', prorettype => 'void', proargtypes => 'oid', + proparallel => 'u', prorettype => 'void', proargtypes => 'int8', prosrc => 'binary_upgrade_set_next_heap_relfilenode' }, { oid => '4546', descr => 'for use by pg_upgrade', proname => 'binary_upgrade_set_next_index_relfilenode', provolatile => 'v', - proparallel => 'u', prorettype => 'void', proargtypes => 'oid', + proparallel => 'u', prorettype => 'void', proargtypes => 'int8', prosrc => 'binary_upgrade_set_next_index_relfilenode' }, { oid => '4547', descr => 'for use by pg_upgrade', proname => 'binary_upgrade_set_next_toast_relfilenode', provolatile => 'v', - proparallel => 'u', prorettype => 'void', proargtypes => 'oid', + proparallel => 'u', prorettype => 'void', proargtypes => 'int8', prosrc => 'binary_upgrade_set_next_toast_relfilenode' }, { oid => '4548', descr => 'for use by pg_upgrade', proname => 'binary_upgrade_set_next_pg_tablespace_oid', provolatile => 'v', diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index 5d4037f..297c20b 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -66,7 +66,7 @@ extern void SetRelationHasSubclass(Oid relationId, bool relhassubclass); extern bool CheckRelationTableSpaceMove(Relation rel, Oid newTableSpaceId); extern void SetRelationTableSpace(Relation rel, Oid newTableSpaceId, - Oid newRelFileNode); + RelNode newRelFileNode); extern ObjectAddress renameatt(RenameStmt *stmt); diff --git a/src/include/common/relpath.h b/src/include/common/relpath.h index a4b5dc8..d6d6215 100644 --- a/src/include/common/relpath.h +++ b/src/include/common/relpath.h @@ -66,7 +66,7 @@ extern int forkname_chars(const char *str, ForkNumber *fork); */ extern char *GetDatabasePath(Oid dbNode, Oid spcNode); -extern char *GetRelationPath(Oid dbNode, Oid spcNode, Oid relNode, +extern char *GetRelationPath(Oid dbNode, Oid spcNode, RelNode relNode, int backendId, ForkNumber forkNumber); /* diff --git a/src/include/fe_utils/option_utils.h b/src/include/fe_utils/option_utils.h index 03c09fd..8c0e818 100644 --- a/src/include/fe_utils/option_utils.h +++ b/src/include/fe_utils/option_utils.h @@ -22,5 +22,8 @@ extern void handle_help_version_opts(int argc, char *argv[], extern bool option_parse_int(const char *optarg, const char *optname, int min_range, int max_range, int *result); +extern bool option_parse_int64(const char *optarg, const char *optname, + int64 min_range, int64 max_range, + int64 *result); #endif /* OPTION_UTILS_H */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 34218b7..6f7bd0f 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2901,7 +2901,7 @@ typedef struct IndexStmt List *excludeOpNames; /* exclusion operator names, or NIL if none */ char *idxcomment; /* comment to apply to index, or NULL */ Oid indexOid; /* OID of an existing index, if any */ - Oid oldNode; /* relfilenode of existing storage, if any */ + RelNode oldNode; /* relfilenode of existing storage, if any */ SubTransactionId oldCreateSubid; /* rd_createSubid of oldNode */ SubTransactionId oldFirstRelfilenodeSubid; /* rd_firstRelfilenodeSubid of * oldNode */ diff --git a/src/include/postgres_ext.h b/src/include/postgres_ext.h index fdb61b7..7454933 100644 --- a/src/include/postgres_ext.h +++ b/src/include/postgres_ext.h @@ -46,6 +46,21 @@ typedef unsigned int Oid; /* Define a signed 64-bit integer type for use in client API declarations. */ typedef PG_INT64_TYPE pg_int64; +/* + * RelNode data type identifies the specific relation file name. RelNode is + * unique within a cluster. + * + * XXX idealy we can use uint64 but current we only have int8 as an exposed + * datatype so maybe we should make a new datatype relnode which will be of + * type 8 bytes unsigned integer. + */ +typedef pg_int64 RelNode; + +#define atorelnode(x) ((RelNode) strtoul((x), NULL, 10)) + +#define InvalidRelNode ((RelNode) 0) +#define FirstNormalRelNode ((RelNode) 1) +#define RelNodeIsValid(relNode) ((bool) ((relNode) != InvalidRelNode)) /* * Identifiers of error message fields. Kept here to keep common diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 0286d51..6e940a6 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -21,6 +21,7 @@ #include "storage/condition_variable.h" #include "storage/latch.h" #include "storage/lwlock.h" +#include "storage/relfilenode.h" #include "storage/shmem.h" #include "storage/smgr.h" #include "storage/spin.h" @@ -92,8 +93,9 @@ typedef struct buftag { Oid spcOid; /* tablespace oid. */ Oid dbOid; /* database oid. */ - Oid fileNode; /* relation file node. */ - ForkNumber forkNum; + uint32 fileNode_low; /* relation file node 32 lower bits */ + uint32 fileNode_hi:24; /* relation file node 24 high bits */ + uint32 forkNum:8; BlockNumber blockNum; /* blknum relative to begin of reln */ } BufferTag; @@ -101,7 +103,8 @@ typedef struct buftag ( \ (a).spcOid = InvalidOid, \ (a).dbOid = InvalidOid, \ - (a).fileNode = InvalidOid, \ + (a).fileNode_low = 0, \ + (a).fileNode_hi = 0, \ (a).forkNum = InvalidForkNumber, \ (a).blockNum = InvalidBlockNumber \ ) @@ -110,7 +113,7 @@ typedef struct buftag ( \ (a).spcOid = (xx_rnode).spcNode, \ (a).dbOid = (xx_rnode).dbNode, \ - (a).fileNode = (xx_rnode).relNode, \ + BufTagSetFileNode(a, (xx_rnode).relNode), \ (a).forkNum = (xx_forkNum), \ (a).blockNum = (xx_blockNum) \ ) @@ -119,23 +122,33 @@ typedef struct buftag ( \ (a).spcOid == (b).spcOid && \ (a).dbOid == (b).dbOid && \ - (a).fileNode == (b).fileNode && \ + (a).fileNode_low == (b).fileNode_low && \ + (a).fileNode_hi == (b).fileNode_hi && \ (a).blockNum == (b).blockNum && \ (a).forkNum == (b).forkNum \ ) +#define BufTagGetFileNode(a) \ + ((((uint64) (a).fileNode_hi << 32) | ((uint32) (a).fileNode_low))) + +#define BufTagSetFileNode(a, node) \ +( \ + (a).fileNode_hi = (node) >> 32, \ + (a).fileNode_low = (node) & 0xffffffff \ +) + #define BuffTagGetRelFileNode(a, node) \ do { \ (node).spcNode = (a).spcOid; \ (node).dbNode = (a).dbOid; \ - (node).relNode = (a).fileNode; \ + (node).relNode = BufTagGetFileNode(a); \ } while(0) #define BuffTagRelFileNodeEquals(a, node) \ ( \ (a).spcOid == (node).spcNode && \ (a).dbOid == (node).dbNode && \ - (a).fileNode == (node).relNode \ + BufTagGetFileNode(a) == (node).relNode \ ) /* @@ -312,7 +325,7 @@ extern BufferDesc *LocalBufferDescriptors; typedef struct CkptSortItem { Oid tsId; - Oid relNode; + RelNode relNode; ForkNumber forkNum; BlockNumber blockNum; int buf_id; diff --git a/src/include/storage/relfilenode.h b/src/include/storage/relfilenode.h index 4fdc606..cd2110c 100644 --- a/src/include/storage/relfilenode.h +++ b/src/include/storage/relfilenode.h @@ -34,8 +34,7 @@ * relNode identifies the specific relation. relNode corresponds to * pg_class.relfilenode (NOT pg_class.oid, because we need to be able * to assign new physical files to relations in some situations). - * Notice that relNode is only unique within a database in a particular - * tablespace. + * Notice that relNode is unique within a cluster. * * Note: spcNode must be GLOBALTABLESPACE_OID if and only if dbNode is * zero. We support shared relations only in the "global" tablespace. @@ -58,7 +57,7 @@ typedef struct RelFileNode { Oid spcNode; /* tablespace */ Oid dbNode; /* database */ - Oid relNode; /* relation */ + RelNode relNode; /* relation */ } RelFileNode; /* @@ -75,6 +74,15 @@ typedef struct RelFileNodeBackend BackendId backend; } RelFileNodeBackend; +#define SizeOfRelFileNodeBackend \ + (offsetof(RelFileNodeBackend, backend) + sizeof(BackendId)) + +/* + * Max value of the relfilnode. Relfilenode will be of 56bits wide for more + * details refer comments atop BufferTag. + */ +#define MAX_RELFILENODE ((((uint64) 1) << 56) - 1) + #define RelFileNodeBackendIsTemp(rnode) \ ((rnode).backend != InvalidBackendId) diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 6da1b22..a47ede3 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -526,7 +526,7 @@ typedef struct ViewOptions */ #define RelationIsMapped(relation) \ (RELKIND_HAS_STORAGE((relation)->rd_rel->relkind) && \ - ((relation)->rd_rel->relfilenode == InvalidOid)) + ((relation)->rd_rel->relfilenode == InvalidRelNode)) /* * RelationGetSmgr diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 84d6afe..5d13660 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -102,7 +102,7 @@ extern Relation RelationBuildLocalRelation(const char *relname, TupleDesc tupDesc, Oid relid, Oid accessmtd, - Oid relfilenode, + RelNode relfilenode, Oid reltablespace, bool shared_relation, bool mapped_relation, diff --git a/src/include/utils/relfilenodemap.h b/src/include/utils/relfilenodemap.h index 77d8046..d324981 100644 --- a/src/include/utils/relfilenodemap.h +++ b/src/include/utils/relfilenodemap.h @@ -13,6 +13,6 @@ #ifndef RELFILENODEMAP_H #define RELFILENODEMAP_H -extern Oid RelidByRelfilenode(Oid reltablespace, Oid relfilenode); +extern Oid RelidByRelfilenode(Oid reltablespace, RelNode relfilenode); #endif /* RELFILENODEMAP_H */ diff --git a/src/include/utils/relmapper.h b/src/include/utils/relmapper.h index 9fbb5a7..58234a8 100644 --- a/src/include/utils/relmapper.h +++ b/src/include/utils/relmapper.h @@ -35,11 +35,11 @@ typedef struct xl_relmap_update #define MinSizeOfRelmapUpdate offsetof(xl_relmap_update, data) -extern Oid RelationMapOidToFilenode(Oid relationId, bool shared); +extern RelNode RelationMapOidToFilenode(Oid relationId, bool shared); -extern Oid RelationMapFilenodeToOid(Oid relationId, bool shared); +extern Oid RelationMapFilenodeToOid(RelNode relationId, bool shared); -extern void RelationMapUpdateMap(Oid relationId, Oid fileNode, bool shared, +extern void RelationMapUpdateMap(Oid relationId, RelNode fileNode, bool shared, bool immediate); extern void RelationMapRemoveMapping(Oid relationId); diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index 16e0475..58aeddb 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -2164,7 +2164,6 @@ select relname, c.oid = oldoid as orig_oid, case relfilenode when 0 then 'none' - when c.oid then 'own' when oldfilenode then 'orig' else 'OTHER' end as storage, @@ -2175,10 +2174,10 @@ select relname, relname | orig_oid | storage | desc ------------------------------+----------+---------+--------------- at_partitioned | t | none | - at_partitioned_0 | t | own | - at_partitioned_0_id_name_key | t | own | child 0 index - at_partitioned_1 | t | own | - at_partitioned_1_id_name_key | t | own | child 1 index + at_partitioned_0 | t | orig | + at_partitioned_0_id_name_key | t | orig | child 0 index + at_partitioned_1 | t | orig | + at_partitioned_1_id_name_key | t | orig | child 1 index at_partitioned_id_name_key | t | none | parent index (6 rows) @@ -2198,7 +2197,6 @@ select relname, c.oid = oldoid as orig_oid, case relfilenode when 0 then 'none' - when c.oid then 'own' when oldfilenode then 'orig' else 'OTHER' end as storage, @@ -2209,10 +2207,10 @@ select relname, relname | orig_oid | storage | desc ------------------------------+----------+---------+-------------- at_partitioned | t | none | - at_partitioned_0 | t | own | - at_partitioned_0_id_name_key | f | own | parent index - at_partitioned_1 | t | own | - at_partitioned_1_id_name_key | f | own | parent index + at_partitioned_0 | t | orig | + at_partitioned_0_id_name_key | f | OTHER | parent index + at_partitioned_1 | t | orig | + at_partitioned_1_id_name_key | f | OTHER | parent index at_partitioned_id_name_key | f | none | parent index (6 rows) @@ -2556,7 +2554,7 @@ CREATE FUNCTION check_ddl_rewrite(p_tablename regclass, p_ddl text) RETURNS boolean LANGUAGE plpgsql AS $$ DECLARE - v_relfilenode oid; + v_relfilenode int8; BEGIN v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename; diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index ac894c0..250e6cd 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -1478,7 +1478,6 @@ select relname, c.oid = oldoid as orig_oid, case relfilenode when 0 then 'none' - when c.oid then 'own' when oldfilenode then 'orig' else 'OTHER' end as storage, @@ -1499,7 +1498,6 @@ select relname, c.oid = oldoid as orig_oid, case relfilenode when 0 then 'none' - when c.oid then 'own' when oldfilenode then 'orig' else 'OTHER' end as storage, @@ -1638,7 +1636,7 @@ CREATE FUNCTION check_ddl_rewrite(p_tablename regclass, p_ddl text) RETURNS boolean LANGUAGE plpgsql AS $$ DECLARE - v_relfilenode oid; + v_relfilenode int8; BEGIN v_relfilenode := relfilenode FROM pg_class WHERE oid = p_tablename; -- 1.8.3.1