From d8489cf06b9cd186f5dac801879e604bb330f79a Mon Sep 17 00:00:00 2001 From: kommih Date: Mon, 8 Oct 2018 14:33:49 +1100 Subject: [PATCH 1/2] New API setNewfilenode This API can be used to set the filenode of a relation. The wrapper function for this API is table_set_new_filenode, using of it for sequence and index to create storage. The wrapper function name can be updated if required. --- src/backend/access/heap/heapam_handler.c | 128 ++++++++++++++++++++- src/backend/catalog/index.c | 2 +- src/backend/commands/sequence.c | 5 +- src/backend/commands/tablecmds.c | 6 +- src/backend/utils/cache/relcache.c | 135 ++--------------------- src/include/access/tableam.h | 13 +++ src/include/utils/relcache.h | 9 +- 7 files changed, 157 insertions(+), 141 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index c3960dc91f..313ed319fc 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -22,6 +22,7 @@ #include "miscadmin.h" +#include "access/multixact.h" #include "access/heapam.h" #include "access/relscan.h" #include "access/rewriteheap.h" @@ -29,12 +30,17 @@ #include "access/tsmapi.h" #include "catalog/catalog.h" #include "catalog/index.h" +#include "catalog/indexing.h" #include "catalog/pg_am_d.h" +#include "catalog/storage.h" #include "executor/executor.h" #include "pgstat.h" #include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/rel.h" +#include "utils/relcache.h" +#include "utils/relmapper.h" +#include "utils/syscache.h" #include "utils/tqual.h" #include "storage/bufpage.h" #include "storage/bufmgr.h" @@ -2116,6 +2122,124 @@ heap_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, pfree(isnull); } +/* + * RelationSetNewRelfilenode + * + * Assign a new relfilenode (physical file name) to the relation. + * + * This allows a full rewrite of the relation to be done with transactional + * safety (since the filenode assignment can be rolled back). Note however + * that there is no simple way to access the relation's old data for the + * remainder of the current transaction. This limits the usefulness to cases + * such as TRUNCATE or rebuilding an index from scratch. + * + * Caller must already hold exclusive lock on the relation. + * + * The relation is marked with relfrozenxid = freezeXid (InvalidTransactionId + * must be passed for indexes and sequences). This should be a lower bound on + * the XIDs that will be put into the new relation contents. + * + * The new filenode's persistence is set to the given value. This is useful + * for the cases that are changing the relation's persistence; other callers + * need to pass the original relpersistence value. + */ +static void +RelationSetNewRelfilenode(Relation relation, char persistence, + TransactionId freezeXid, MultiXactId minmulti) +{ + Oid newrelfilenode; + RelFileNodeBackend newrnode; + Relation pg_class; + HeapTuple tuple; + Form_pg_class classform; + + /* Indexes, sequences must have Invalid frozenxid; other rels must not */ + Assert((relation->rd_rel->relkind == RELKIND_INDEX || + relation->rd_rel->relkind == RELKIND_SEQUENCE) ? + freezeXid == InvalidTransactionId : + TransactionIdIsNormal(freezeXid)); + Assert(TransactionIdIsNormal(freezeXid) == MultiXactIdIsValid(minmulti)); + + /* Allocate a new relfilenode */ + newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL, + persistence); + + /* + * Get a writable copy of the pg_class tuple for the given relation. + */ + pg_class = heap_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(relation))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "could not find tuple for relation %u", + RelationGetRelid(relation)); + classform = (Form_pg_class) GETSTRUCT(tuple); + + /* + * Create storage for the main fork of the new relfilenode. + * + * NOTE: any conflict in relfilenode value will be caught here, if + * GetNewRelFileNode messes up for any reason. + */ + newrnode.node = relation->rd_node; + newrnode.node.relNode = newrelfilenode; + newrnode.backend = relation->rd_backend; + RelationCreateStorage(newrnode.node, persistence); + smgrclosenode(newrnode); + + /* + * Schedule unlinking of the old storage at transaction commit. + */ + RelationDropStorage(relation); + + /* + * Now update the pg_class row. However, if we're dealing with a mapped + * index, pg_class.relfilenode doesn't change; instead we have to send the + * update to the relation mapper. + */ + if (RelationIsMapped(relation)) + RelationMapUpdateMap(RelationGetRelid(relation), + newrelfilenode, + relation->rd_rel->relisshared, + false); + else + classform->relfilenode = newrelfilenode; + + /* These changes are safe even for a mapped relation */ + if (relation->rd_rel->relkind != RELKIND_SEQUENCE) + { + classform->relpages = 0; /* it's empty until further notice */ + classform->reltuples = 0; + classform->relallvisible = 0; + } + classform->relfrozenxid = freezeXid; + classform->relminmxid = minmulti; + classform->relpersistence = persistence; + + CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); + + heap_freetuple(tuple); + + heap_close(pg_class, RowExclusiveLock); + + /* + * Make the pg_class row change visible, as well as the relation map + * change if any. This will cause the relcache entry to get updated, too. + */ + CommandCounterIncrement(); + + /* + * Mark the rel as having been given a new relfilenode in the current + * (sub) transaction. This is a hint that can be used to optimize later + * operations on the rel in the same transaction. + */ + relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId(); + + /* Flag relation as needing eoxact cleanup (to remove the hint) */ + EOXactListAdd(relation); +} + static const TableAmRoutine heapam_methods = { .type = T_TableAmRoutine, @@ -2163,7 +2287,9 @@ static const TableAmRoutine heapam_methods = { .index_build_range_scan = IndexBuildHeapRangeScan, - .index_validate_scan = validate_index_heapscan + .index_validate_scan = validate_index_heapscan, + + .SetNewFileNode = RelationSetNewRelfilenode }; const TableAmRoutine * diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 55477bd995..df213dc07d 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2865,7 +2865,7 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, } /* We'll build a new physical relation for the index */ - RelationSetNewRelfilenode(iRel, persistence, InvalidTransactionId, + table_set_new_filenode(iRel, persistence, InvalidTransactionId, InvalidMultiXactId); /* Initialize the index and rebuild */ diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 89122d4ad7..107f9a0176 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -17,6 +17,7 @@ #include "access/bufmask.h" #include "access/htup_details.h" #include "access/multixact.h" +#include "access/tableam.h" #include "access/transam.h" #include "access/xact.h" #include "access/xlog.h" @@ -315,7 +316,7 @@ ResetSequence(Oid seq_relid) * sequence's relfrozenxid at 0, since it won't contain any unfrozen XIDs. * Same with relminmxid, since a sequence will never contain multixacts. */ - RelationSetNewRelfilenode(seq_rel, seq_rel->rd_rel->relpersistence, + table_set_new_filenode(seq_rel, seq_rel->rd_rel->relpersistence, InvalidTransactionId, InvalidMultiXactId); /* @@ -485,7 +486,7 @@ AlterSequence(ParseState *pstate, AlterSeqStmt *stmt) * at 0, since it won't contain any unfrozen XIDs. Same with * relminmxid, since a sequence will never contain multixacts. */ - RelationSetNewRelfilenode(seqrel, seqrel->rd_rel->relpersistence, + table_set_new_filenode(seqrel, seqrel->rd_rel->relpersistence, InvalidTransactionId, InvalidMultiXactId); /* diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index f3526b267d..6db214309e 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -1643,10 +1643,8 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, * Create a new empty storage file for the relation, and assign it * as the relfilenode value. The old storage file is scheduled for * deletion at commit. - * - * PBORKED: needs to be a callback */ - RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence, + table_set_new_filenode(rel, rel->rd_rel->relpersistence, RecentXmin, minmulti); if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) heap_create_init_fork(rel); @@ -1660,7 +1658,7 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, List *relids_logged, if (OidIsValid(toast_relid)) { rel = relation_open(toast_relid, AccessExclusiveLock); - RelationSetNewRelfilenode(rel, rel->rd_rel->relpersistence, + table_set_new_filenode(rel, rel->rd_rel->relpersistence, RecentXmin, minmulti); if (rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED) heap_create_init_fork(rel); diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 0d6e5a189f..0592fdc750 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -160,13 +160,6 @@ static Oid eoxact_list[MAX_EOXACT_LIST]; static int eoxact_list_len = 0; static bool eoxact_list_overflowed = false; -#define EOXactListAdd(rel) \ - do { \ - if (eoxact_list_len < MAX_EOXACT_LIST) \ - eoxact_list[eoxact_list_len++] = (rel)->rd_id; \ - else \ - eoxact_list_overflowed = true; \ - } while (0) /* * EOXactTupleDescArray stores TupleDescs that (might) need AtEOXact @@ -292,6 +285,14 @@ static void unlink_initfile(const char *initfilename, int elevel); static bool equalPartitionDescs(PartitionKey key, PartitionDesc partdesc1, PartitionDesc partdesc2); +void +EOXactListAdd(Relation rel) +{ + if (eoxact_list_len < MAX_EOXACT_LIST) + eoxact_list[eoxact_list_len++] = (rel)->rd_id; + else + eoxact_list_overflowed = true; +} /* * ScanPgRelation @@ -3392,126 +3393,6 @@ RelationBuildLocalRelation(const char *relname, return rel; } - -/* - * RelationSetNewRelfilenode - * - * Assign a new relfilenode (physical file name) to the relation. - * - * This allows a full rewrite of the relation to be done with transactional - * safety (since the filenode assignment can be rolled back). Note however - * that there is no simple way to access the relation's old data for the - * remainder of the current transaction. This limits the usefulness to cases - * such as TRUNCATE or rebuilding an index from scratch. - * - * Caller must already hold exclusive lock on the relation. - * - * The relation is marked with relfrozenxid = freezeXid (InvalidTransactionId - * must be passed for indexes and sequences). This should be a lower bound on - * the XIDs that will be put into the new relation contents. - * - * The new filenode's persistence is set to the given value. This is useful - * for the cases that are changing the relation's persistence; other callers - * need to pass the original relpersistence value. - */ -void -RelationSetNewRelfilenode(Relation relation, char persistence, - TransactionId freezeXid, MultiXactId minmulti) -{ - Oid newrelfilenode; - RelFileNodeBackend newrnode; - Relation pg_class; - HeapTuple tuple; - Form_pg_class classform; - - /* Indexes, sequences must have Invalid frozenxid; other rels must not */ - Assert((relation->rd_rel->relkind == RELKIND_INDEX || - relation->rd_rel->relkind == RELKIND_SEQUENCE) ? - freezeXid == InvalidTransactionId : - TransactionIdIsNormal(freezeXid)); - Assert(TransactionIdIsNormal(freezeXid) == MultiXactIdIsValid(minmulti)); - - /* Allocate a new relfilenode */ - newrelfilenode = GetNewRelFileNode(relation->rd_rel->reltablespace, NULL, - persistence); - - /* - * Get a writable copy of the pg_class tuple for the given relation. - */ - pg_class = heap_open(RelationRelationId, RowExclusiveLock); - - tuple = SearchSysCacheCopy1(RELOID, - ObjectIdGetDatum(RelationGetRelid(relation))); - if (!HeapTupleIsValid(tuple)) - elog(ERROR, "could not find tuple for relation %u", - RelationGetRelid(relation)); - classform = (Form_pg_class) GETSTRUCT(tuple); - - /* - * Create storage for the main fork of the new relfilenode. - * - * NOTE: any conflict in relfilenode value will be caught here, if - * GetNewRelFileNode messes up for any reason. - */ - newrnode.node = relation->rd_node; - newrnode.node.relNode = newrelfilenode; - newrnode.backend = relation->rd_backend; - RelationCreateStorage(newrnode.node, persistence); - smgrclosenode(newrnode); - - /* - * Schedule unlinking of the old storage at transaction commit. - */ - RelationDropStorage(relation); - - /* - * Now update the pg_class row. However, if we're dealing with a mapped - * index, pg_class.relfilenode doesn't change; instead we have to send the - * update to the relation mapper. - */ - if (RelationIsMapped(relation)) - RelationMapUpdateMap(RelationGetRelid(relation), - newrelfilenode, - relation->rd_rel->relisshared, - false); - else - classform->relfilenode = newrelfilenode; - - /* These changes are safe even for a mapped relation */ - if (relation->rd_rel->relkind != RELKIND_SEQUENCE) - { - classform->relpages = 0; /* it's empty until further notice */ - classform->reltuples = 0; - classform->relallvisible = 0; - } - classform->relfrozenxid = freezeXid; - classform->relminmxid = minmulti; - classform->relpersistence = persistence; - - CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); - - heap_freetuple(tuple); - - heap_close(pg_class, RowExclusiveLock); - - /* - * Make the pg_class row change visible, as well as the relation map - * change if any. This will cause the relcache entry to get updated, too. - */ - CommandCounterIncrement(); - - /* - * Mark the rel as having been given a new relfilenode in the current - * (sub) transaction. This is a hint that can be used to optimize later - * operations on the rel in the same transaction. - */ - relation->rd_newRelfilenodeSubid = GetCurrentSubTransactionId(); - - /* Flag relation as needing eoxact cleanup (to remove the hint) */ - EOXactListAdd(relation); -} - - /* * RelationCacheInitialize * diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 7fe6ff6c22..4d5b11c294 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -194,6 +194,9 @@ struct SampleScanState; typedef bool (*SampleScanNextBlock_function)(TableScanDesc scan, struct SampleScanState *scanstate); typedef bool (*SampleScanNextTuple_function)(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot); +typedef void (*SetNewFileNode_function)(Relation relation, char persistence, + TransactionId freezeXid, MultiXactId minmulti); + /* * API struct for a table AM. Note this must be allocated in a * server-lifetime manner, typically as a static const struct. @@ -250,6 +253,8 @@ typedef struct TableAmRoutine IndexBuildRangeScan_function index_build_range_scan; IndexValidateScan_function index_validate_scan; + + SetNewFileNode_function SetNewFileNode; } TableAmRoutine; static inline const TupleTableSlotOps* @@ -741,6 +746,14 @@ table_index_build_range_scan(Relation heapRelation, scan); } +static inline void +table_set_new_filenode(Relation relation, char persistence, + TransactionId freezeXid, MultiXactId minmulti) +{ + relation->rd_tableamroutine->SetNewFileNode(relation, persistence, + freezeXid, minmulti); +} + extern BlockNumber table_parallelscan_nextpage(TableScanDesc scan); extern void table_parallelscan_startblock_init(TableScanDesc scan); extern Size table_parallelscan_estimate(Snapshot snapshot); diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h index 858a7b30d2..1482dae904 100644 --- a/src/include/utils/relcache.h +++ b/src/include/utils/relcache.h @@ -33,6 +33,9 @@ typedef struct RelationData *Relation; */ typedef Relation *RelationPtr; +/* Function to store the clenup OID's */ +extern void EOXactListAdd(Relation rel); + /* * Routines to open (lookup) and close a relcache entry */ @@ -109,12 +112,6 @@ extern Relation RelationBuildLocalRelation(const char *relname, char relpersistence, char relkind); -/* - * Routine to manage assignment of new relfilenode to a relation - */ -extern void RelationSetNewRelfilenode(Relation relation, char persistence, - TransactionId freezeXid, MultiXactId minmulti); - /* * Routines for flushing/rebuilding relcache entries in various scenarios */ -- 2.18.0.windows.1