From 58e7cba46540340bb598661fb023e9ad8192518a Mon Sep 17 00:00:00 2001 From: Dilip Kumar Date: Thu, 15 May 2025 16:59:31 +0530 Subject: [PATCH v1 2/4] Provide Support for creating global indexes and other DDL on global indexes Syntax: CREATE INDEX name ON table (column_list) GLOBAL; As described in commit message of the previous patch TIDs alone are insufficient for uniquely identifying tuples in global indexes because they include tuples from multiple partitions. To uniquely identify heap tuples, we append a partitionID. For detailed information, refer to the comments in the patch 0001 commit message. In this design, the partitionID is included as the last key column of the index. The rationale behind storing it as the last key column is that in various scenarios, this column is treated as an extended index key column. For example, in B-tree indexes, each index tuple must be uniquely identified. If we encounter duplicate keys, we use heap TID as a tiebreaker. However, for global indexes, relying solely on heap TID isn't adequate; we also require the partition identifier. Including this as an additional key column simplifies the process, as index tuples are arranged in key order, and this column will automatically be part of that order. Whenever a global index is created, this patch utilizes the interfaces from the previous patch to assign a partitionID to each leaf relation of the partitioned table on which the global index is being created. It then inserts the (indexid, partitionID) -> reloid mapping into the pg_index_partition table for each leaf relation. Additionally, we need to create this mapping whenever a partition is attached. Specifically, we must create mappings for all leaf partitions of the table being attached, corresponding to the global indexes on the parent and all its ancestors under which the new partition is being attached. Similarly, when a partition is detached, we must invalidate the mappings for all leaf partitions under the detached partition related to the global indexes on all ancestors from which the partition is being detached. Open Items: - Rebuilding the global indexes when truncating the relation and reindexing the relation. currently global indexes are getting reindexed for each partition whereas it should be done only once - Currently parition id column is treated as special column, Robert suggested to do it as an expression - Vacuum is vacuuming global indexes multiple times for each leaf partition, this need to be optimized - RelationGetIndexList is not follwoing the locking order i.e. parent to child as the child might already be locked this is causing a deadlock, need to fix this --- contrib/pg_overexplain/pg_overexplain.c | 3 + src/backend/access/common/reloptions.c | 1 + src/backend/access/heap/heapam.c | 6 +- src/backend/access/index/genam.c | 9 +- src/backend/access/index/indexam.c | 3 +- src/backend/access/nbtree/nbtdedup.c | 45 ++- src/backend/access/nbtree/nbtinsert.c | 190 ++++++++-- src/backend/access/nbtree/nbtpage.c | 84 ++++- src/backend/access/nbtree/nbtree.c | 18 + src/backend/access/nbtree/nbtsort.c | 84 ++++- src/backend/access/table/table.c | 1 + src/backend/bootstrap/bootparse.y | 2 + src/backend/catalog/aclchk.c | 5 +- src/backend/catalog/dependency.c | 3 +- src/backend/catalog/heap.c | 25 +- src/backend/catalog/index.c | 219 ++++++++--- src/backend/catalog/namespace.c | 1 + src/backend/catalog/objectaddress.c | 11 +- src/backend/catalog/partition.c | 6 +- src/backend/catalog/pg_class.c | 2 + src/backend/catalog/pg_index_partitions.c | 48 ++- src/backend/catalog/toasting.c | 2 +- src/backend/commands/analyze.c | 55 ++- src/backend/commands/cluster.c | 5 + src/backend/commands/indexcmds.c | 197 ++++++++-- src/backend/commands/tablecmds.c | 430 ++++++++++++++++++++-- src/backend/commands/vacuum.c | 9 +- src/backend/executor/execIndexing.c | 15 +- src/backend/optimizer/util/plancat.c | 9 + src/backend/parser/gram.y | 21 +- src/backend/parser/parse_utilcmd.c | 10 + src/backend/statistics/stat_utils.c | 5 + src/backend/tcop/utility.c | 12 +- src/backend/utils/adt/amutils.c | 3 +- src/backend/utils/adt/ruleutils.c | 11 + src/backend/utils/cache/lsyscache.c | 21 ++ src/backend/utils/cache/relcache.c | 37 +- src/bin/pg_dump/pg_dump.c | 1 + src/bin/psql/describe.c | 15 +- src/include/access/nbtree.h | 64 +++- src/include/access/tableam.h | 20 + src/include/catalog/index.h | 13 +- src/include/catalog/pg_class.h | 10 +- src/include/catalog/pg_index_partitions.h | 24 ++ src/include/commands/defrem.h | 1 + src/include/commands/tablecmds.h | 3 +- src/include/nodes/execnodes.h | 10 + src/include/nodes/parsenodes.h | 1 + src/include/utils/lsyscache.h | 1 + src/include/utils/rel.h | 17 +- 50 files changed, 1575 insertions(+), 213 deletions(-) diff --git a/contrib/pg_overexplain/pg_overexplain.c b/contrib/pg_overexplain/pg_overexplain.c index de824566f8..39c502d721 100644 --- a/contrib/pg_overexplain/pg_overexplain.c +++ b/contrib/pg_overexplain/pg_overexplain.c @@ -522,6 +522,9 @@ overexplain_range_table(PlannedStmt *plannedstmt, ExplainState *es) case RELKIND_PARTITIONED_INDEX: relkind = "partitioned_index"; break; + case RELKIND_GLOBAL_INDEX: + relkind = "global_index"; + break; case '\0': relkind = NULL; break; diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 50747c1639..fb4ace1bb6 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -1429,6 +1429,7 @@ extractRelOptions(HeapTuple tuple, TupleDesc tupdesc, break; case RELKIND_INDEX: case RELKIND_PARTITIONED_INDEX: + case RELKIND_GLOBAL_INDEX: options = index_reloptions(amoptions, datum, false); break; case RELKIND_FOREIGN_TABLE: diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 0dcd6ee817..e8cabea93a 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -4259,7 +4259,8 @@ check_lock_if_inplace_updateable_rel(Relation relation, else dbid = MyDatabaseId; - if (classForm->relkind == RELKIND_INDEX) + if (classForm->relkind == RELKIND_INDEX || + classForm->relkind == RELKIND_GLOBAL_INDEX) { Relation irel = index_open(relid, AccessShareLock); @@ -4313,7 +4314,8 @@ check_inplace_rel_lock(HeapTuple oldtup) else dbid = MyDatabaseId; - if (classForm->relkind == RELKIND_INDEX) + if (classForm->relkind == RELKIND_INDEX || + classForm->relkind == RELKIND_GLOBAL_INDEX) { Relation irel = index_open(relid, AccessShareLock); diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c index 0cb27af131..c2b80669aa 100644 --- a/src/backend/access/index/genam.c +++ b/src/backend/access/index/genam.c @@ -187,7 +187,14 @@ BuildIndexValueDescription(Relation indexRelation, Oid indrelid; AclResult aclresult; - indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation); + /* + * For global index skip the partitionID attribute while describing the + * index values. + */ + if (RelationIsGlobalIndex(indexRelation)) + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation) - 1; + else + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation); /* * Check permissions- if the user does not have access to view all of the diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 219df1971d..3aa1fc92df 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -197,7 +197,8 @@ static inline void validate_relation_kind(Relation r) { if (r->rd_rel->relkind != RELKIND_INDEX && - r->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + r->rd_rel->relkind != RELKIND_PARTITIONED_INDEX && + r->rd_rel->relkind != RELKIND_GLOBAL_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not an index", diff --git a/src/backend/access/nbtree/nbtdedup.c b/src/backend/access/nbtree/nbtdedup.c index 08884116ae..bdd085c8cc 100644 --- a/src/backend/access/nbtree/nbtdedup.c +++ b/src/backend/access/nbtree/nbtdedup.c @@ -20,8 +20,10 @@ #include "miscadmin.h" #include "utils/rel.h" -static void _bt_bottomupdel_finish_pending(Page page, BTDedupState state, - TM_IndexDeleteOp *delstate); +static void _bt_bottomupdel_finish_pending(Relation rel, Page page, + BTDedupState state, + TM_IndexDeleteOp *delstate, + PartidDeltidMapping *mapping); static bool _bt_do_singleval(Relation rel, Page page, BTDedupState state, OffsetNumber minoff, IndexTuple newitem); static void _bt_singleval_fillfactor(Page page, BTDedupState state, @@ -315,6 +317,7 @@ _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel, BTDedupState state; TM_IndexDeleteOp delstate; bool neverdedup; + PartidDeltidMapping *mapping; int nkeyatts = IndexRelationGetNumberOfKeyAttributes(rel); /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */ @@ -334,6 +337,9 @@ _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel, state->phystupsize = 0; state->nintervals = 0; + /* Allocate memory for partittion id to deleted tid array mapping. */ + mapping = palloc(MaxTIDsPerBTreePage * sizeof(PartidDeltidMapping)); + /* * Initialize tableam state that describes bottom-up index deletion * operation. @@ -382,14 +388,15 @@ _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel, else { /* Finalize interval -- move its TIDs to delete state */ - _bt_bottomupdel_finish_pending(page, state, &delstate); + _bt_bottomupdel_finish_pending(rel, page, state, &delstate, + mapping); /* itup starts new pending interval */ _bt_dedup_start_pending(state, itup, offnum); } } /* Finalize final interval -- move its TIDs to delete state */ - _bt_bottomupdel_finish_pending(page, state, &delstate); + _bt_bottomupdel_finish_pending(rel, page, state, &delstate, mapping); /* * We don't give up now in the event of having few (or even zero) @@ -407,7 +414,7 @@ _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel, pfree(state); /* Ask tableam which TIDs are deletable, then physically delete them */ - _bt_delitems_delete_check(rel, buf, heapRel, &delstate); + _bt_delitems_delete_check(rel, buf, heapRel, &delstate, mapping); pfree(delstate.deltids); pfree(delstate.status); @@ -645,10 +652,12 @@ _bt_dedup_finish_pending(Page newpage, BTDedupState state) * deletion operations. */ static void -_bt_bottomupdel_finish_pending(Page page, BTDedupState state, - TM_IndexDeleteOp *delstate) +_bt_bottomupdel_finish_pending(Relation rel, Page page, BTDedupState state, + TM_IndexDeleteOp *delstate, + PartidDeltidMapping *mapping) { bool dupinterval = (state->nitems > 1); + PartitionId partid = InvalidPartitionId; Assert(state->nitems > 0); Assert(state->nitems <= state->nhtids); @@ -662,6 +671,20 @@ _bt_bottomupdel_finish_pending(Page page, BTDedupState state, TM_IndexDelete *ideltid = &delstate->deltids[delstate->ndeltids]; TM_IndexStatus *istatus = &delstate->status[delstate->ndeltids]; + /* + * A global index stored tids from multiple partitions so we also need + * reloid along with tid to uniquely identifying the tuple. We don't + * need to convert partitionID to reloid for every item because we do + * not deduplicate across partitionID, i.e. all items in BTDedupState + * must belong to same partitionID. + */ + if (!PartIdIsValid(partid) && RelationIsGlobalIndex(rel)) + partid = BTreeTupleGetPartitionId(rel, itup); + + /* All IndexTuple in the state must be having same partitionID */ + Assert(!RelationIsGlobalIndex(rel) || + partid == BTreeTupleGetPartitionId(rel, itup)); + if (!BTreeTupleIsPosting(itup)) { /* Simple case: A plain non-pivot tuple */ @@ -672,6 +695,9 @@ _bt_bottomupdel_finish_pending(Page page, BTDedupState state, istatus->promising = dupinterval; /* simple rule */ istatus->freespace = ItemIdGetLength(itemid) + sizeof(ItemIdData); + /* Create mapping entry. */ + mapping->partid = partid; + mapping->idx = delstate->ndeltids; delstate->ndeltids++; } else @@ -735,6 +761,11 @@ _bt_bottomupdel_finish_pending(Page page, BTDedupState state, ideltid++; istatus++; + + /* Create mapping entry. */ + mapping->partid = partid; + mapping->idx = delstate->ndeltids; + delstate->ndeltids++; } } diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index aa82cede30..94baad3eee 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -17,6 +17,8 @@ #include "access/nbtree.h" #include "access/nbtxlog.h" +#include "access/relation.h" +#include "access/table.h" #include "access/transam.h" #include "access/xloginsert.h" #include "common/int.h" @@ -29,6 +31,17 @@ /* Minimum tree height for application of fastpath optimization */ #define BTREE_FASTPATH_MIN_LEVEL 2 +/* + * Table block information pointed to by LP_DEAD-set tuples in the index. + * For a global index, we also need the PartitionId along with the BlockNumber + * to determine which partition the block belongs to. This information is used + * during simple delete pass. + */ +typedef struct BTHeapBlockInfo +{ + PartitionId partid; + BlockNumber blockno; +} BTHeapBlockInfo; static BTStack _bt_search_insert(Relation rel, Relation heaprel, BTInsertState insertstate); @@ -70,10 +83,12 @@ static void _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, OffsetNumber *deletable, int ndeletable, IndexTuple newitem, OffsetNumber minoff, OffsetNumber maxoff); -static BlockNumber *_bt_deadblocks(Page page, OffsetNumber *deletable, - int ndeletable, IndexTuple newitem, - int *nblocks); +static BTHeapBlockInfo* _bt_deadblocks(Relation rel, Page page, + OffsetNumber *deletable, + int ndeletable, IndexTuple newitem, + int *nblocks); static inline int _bt_blk_cmp(const void *arg1, const void *arg2); +static inline int _bt_indexdel_cmp(const void *arg1, const void *arg2); /* * _bt_doinsert() -- Handle insertion of a single index tuple in the tree. @@ -135,6 +150,13 @@ _bt_doinsert(Relation rel, IndexTuple itup, Assert(checkUnique != UNIQUE_CHECK_EXISTING); is_unique = true; } + + /* + * Ignore the PartitionId attribute for the global indexes until the + * uniqueness established. + */ + if (RelationIsGlobalIndex(rel)) + itup_key->keysz--; } /* @@ -235,6 +257,13 @@ search: /* Uniqueness is established -- restore heap tid as scantid */ if (itup_key->heapkeyspace) itup_key->scantid = &itup->t_tid; + + /* + * Uniqueness is established -- consider the PartitionId for + * (heapkeyspace). + */ + if (RelationIsGlobalIndex(rel)) + itup_key->keysz++; } if (checkUnique != UNIQUE_CHECK_EXISTING) @@ -418,11 +447,13 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, OffsetNumber maxoff; Page page; BTPageOpaque opaque; + Relation partrel = heapRel; Buffer nbuf = InvalidBuffer; bool found = false; bool inposting = false; bool prevalldead = true; int curposti = 0; + Oid heapoid = RelationGetRelid(heapRel); /* Assume unique until we find a duplicate */ *is_unique = true; @@ -540,6 +571,28 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, htid = *BTreeTupleGetPostingN(curitup, curposti); } + /* + * For a global indexes, we need to obtain the exact partition + * heap relation corresponding to the partition ID stored + * inside the index tuple. + */ + if (RelationIsGlobalIndex(rel)) + { + Oid curheapoid = BTreeTupleGetPartitionRelid(rel, curitup); + + if (heapoid != curheapoid) + { + if (heapoid != RelationGetRelid(heapRel)) + { + Assert(partrel != NULL); + relation_close(partrel, NoLock); + } + + partrel = relation_open(curheapoid, NoLock); + heapoid = curheapoid; + } + } + /* * If we are doing a recheck, we expect to find the tuple we * are rechecking. It's not a duplicate, but we have to keep @@ -557,7 +610,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, * with optimizations like heap's HOT, we have just a single * index entry for the entire chain. */ - else if (table_index_fetch_tuple_check(heapRel, &htid, + else if (table_index_fetch_tuple_check(partrel, &htid, &SnapshotDirty, &all_dead)) { @@ -576,6 +629,15 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, if (nbuf != InvalidBuffer) _bt_relbuf(rel, nbuf); *is_unique = false; + + /* + * Close the partrel if this is not same as the heapRel + * passed by the caller. Caller is responsible for + * closing the input heapRel. + */ + if (partrel && partrel != heapRel) + table_close(partrel, NoLock); + return InvalidTransactionId; } @@ -594,6 +656,15 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, *speculativeToken = SnapshotDirty.speculativeToken; /* Caller releases lock on buf immediately */ insertstate->bounds_valid = false; + + /* + * Close the partrel if this is not same as the heapRel + * passed by the caller. Caller is responsible for + * closing the input heapRel. + */ + if (partrel && partrel != heapRel) + table_close(partrel, NoLock); + return xwait; } @@ -669,7 +740,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, RelationGetRelationName(rel)), key_desc ? errdetail("Key %s already exists.", key_desc) : 0, - errtableconstraint(heapRel, + errtableconstraint(partrel, RelationGetRelationName(rel)))); } } @@ -751,6 +822,13 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, } } + /* + * Close the partrel if this is not same as the heapRel passed by the + * caller. Caller is responsible for closing the input heapRel. + */ + if (partrel && partrel != heapRel) + table_close(partrel, NoLock); + /* * If we are doing a recheck then we should have found the tuple we are * checking. Otherwise there's something very wrong --- probably, the @@ -762,7 +840,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, errmsg("failed to re-find tuple within index \"%s\"", RelationGetRelationName(rel)), errhint("This may be because of a non-immutable index expression."), - errtableconstraint(heapRel, + errtableconstraint(partrel, RelationGetRelationName(rel)))); if (nbuf != InvalidBuffer) @@ -2814,13 +2892,14 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, OffsetNumber minoff, OffsetNumber maxoff) { Page page = BufferGetPage(buffer); - BlockNumber *deadblocks; + BTHeapBlockInfo *deadblocks; int ndeadblocks; TM_IndexDeleteOp delstate; OffsetNumber offnum; + PartidDeltidMapping *mapping; /* Get array of table blocks pointed to by LP_DEAD-set tuples */ - deadblocks = _bt_deadblocks(page, deletable, ndeletable, newitem, + deadblocks = _bt_deadblocks(rel, page, deletable, ndeletable, newitem, &ndeadblocks); /* Initialize tableam state that describes index deletion operation */ @@ -2832,6 +2911,9 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, delstate.deltids = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexDelete)); delstate.status = palloc(MaxTIDsPerBTreePage * sizeof(TM_IndexStatus)); + /* Allocate memory for partittion id to deleted tid array mapping. */ + mapping = palloc(MaxTIDsPerBTreePage * sizeof(PartidDeltidMapping)); + for (offnum = minoff; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) @@ -2840,14 +2922,16 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, IndexTuple itup = (IndexTuple) PageGetItem(page, itemid); TM_IndexDelete *odeltid = &delstate.deltids[delstate.ndeltids]; TM_IndexStatus *ostatus = &delstate.status[delstate.ndeltids]; - BlockNumber tidblock; + BTHeapBlockInfo tidblock; void *match; if (!BTreeTupleIsPosting(itup)) { - tidblock = ItemPointerGetBlockNumber(&itup->t_tid); + tidblock.blockno = ItemPointerGetBlockNumber(&itup->t_tid); + tidblock.partid = (RelationIsGlobalIndex(rel)) ? + BTreeTupleGetPartitionId(rel, itup) : InvalidOid; match = bsearch(&tidblock, deadblocks, ndeadblocks, - sizeof(BlockNumber), _bt_blk_cmp); + sizeof(BTHeapBlockInfo), _bt_blk_cmp); if (!match) { @@ -2866,19 +2950,26 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, ostatus->promising = false; /* unused */ ostatus->freespace = 0; /* unused */ + /* Create mapping entry. */ + mapping->partid = tidblock.partid; + mapping->idx = delstate.ndeltids; delstate.ndeltids++; } else { int nitem = BTreeTupleGetNPosting(itup); + PartitionId partid = (RelationIsGlobalIndex(rel)) ? + BTreeTupleGetPartitionId(rel, itup) : InvalidOid; for (int p = 0; p < nitem; p++) { ItemPointer tid = BTreeTupleGetPostingN(itup, p); - tidblock = ItemPointerGetBlockNumber(tid); + tidblock.blockno = ItemPointerGetBlockNumber(tid); + tidblock.partid = partid; + match = bsearch(&tidblock, deadblocks, ndeadblocks, - sizeof(BlockNumber), _bt_blk_cmp); + sizeof(BTHeapBlockInfo), _bt_blk_cmp); if (!match) { @@ -2899,6 +2990,10 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, odeltid++; ostatus++; + + /* Create mapping entry. */ + mapping->partid = tidblock.partid; + mapping->idx = delstate.ndeltids; delstate.ndeltids++; } } @@ -2909,7 +3004,7 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, Assert(delstate.ndeltids >= ndeletable); /* Physically delete LP_DEAD tuples (plus any delete-safe extra TIDs) */ - _bt_delitems_delete_check(rel, buffer, heapRel, &delstate); + _bt_delitems_delete_check(rel, buffer, heapRel, &delstate, mapping); pfree(delstate.deltids); pfree(delstate.status); @@ -2923,6 +3018,16 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, * block from incoming newitem just in case it isn't among the LP_DEAD-related * table blocks. * + * For global indexes, we need the relation OID along with block numbers to + * uniquely identify the block. Therefore, we return the output in the form of + * a partition ID and block number pair and will convert the partition ID to + * the relation OID whenever we need to access the heap. While we could + * convert to the relation OID here and store it directly, this conversion + * might need to be done multiple times. So, we choose to convert when we + * really need to access the heap. Before accessing the heap, we first sort + * them in partition ID order, so the conversion from partition ID to relation + * OID only needs to be done once per partition. + * * Always counting the newitem's table block as an LP_DEAD related block makes * sense because the cost is consistently low; it is practically certain that * the table block will not incur a buffer miss in tableam. On the other hand @@ -2934,13 +3039,14 @@ _bt_simpledel_pass(Relation rel, Buffer buffer, Relation heapRel, * * Returns final array, and sets *nblocks to its final size for caller. */ -static BlockNumber * -_bt_deadblocks(Page page, OffsetNumber *deletable, int ndeletable, - IndexTuple newitem, int *nblocks) +static BTHeapBlockInfo * +_bt_deadblocks(Relation rel, Page page, OffsetNumber *deletable, + int ndeletable, IndexTuple newitem, int *nblocks) { int spacentids, ntids; - BlockNumber *tidblocks; + bool isglobalidx = RelationIsGlobalIndex(rel); + BTHeapBlockInfo *tidblocks; /* * Accumulate each TID's block in array whose initial size has space for @@ -2950,7 +3056,7 @@ _bt_deadblocks(Page page, OffsetNumber *deletable, int ndeletable, */ spacentids = ndeletable + 1; ntids = 0; - tidblocks = (BlockNumber *) palloc(sizeof(BlockNumber) * spacentids); + tidblocks = (BTHeapBlockInfo *) palloc(sizeof(BTHeapBlockInfo) * spacentids); /* * First add the table block for the incoming newitem. This is the one @@ -2958,7 +3064,15 @@ _bt_deadblocks(Page page, OffsetNumber *deletable, int ndeletable, * any known deletable items. */ Assert(!BTreeTupleIsPosting(newitem) && !BTreeTupleIsPivot(newitem)); - tidblocks[ntids++] = ItemPointerGetBlockNumber(&newitem->t_tid); + + /* + * Store PartitionId and BlockNumber of the deletable item. For non-global + * indexes, just store InvalidPartitionId, as it is never going to be + * accessed. + */ + tidblocks[ntids].partid = isglobalidx ? + BTreeTupleGetPartitionId(rel, newitem) : InvalidPartitionId; + tidblocks[ntids++].blockno = ItemPointerGetBlockNumber(&newitem->t_tid); for (int i = 0; i < ndeletable; i++) { @@ -2972,34 +3086,41 @@ _bt_deadblocks(Page page, OffsetNumber *deletable, int ndeletable, if (ntids + 1 > spacentids) { spacentids *= 2; - tidblocks = (BlockNumber *) - repalloc(tidblocks, sizeof(BlockNumber) * spacentids); + tidblocks = (BTHeapBlockInfo *) + repalloc(tidblocks, sizeof(BTHeapBlockInfo) * spacentids); } - tidblocks[ntids++] = ItemPointerGetBlockNumber(&itup->t_tid); + /* Store PartitionId and BlockNumber of the deletable item. */ + tidblocks[ntids].partid = isglobalidx ? + BTreeTupleGetPartitionId(rel, itup) : InvalidPartitionId; + tidblocks[ntids++].blockno = + ItemPointerGetBlockNumber(&itup->t_tid); } else { int nposting = BTreeTupleGetNPosting(itup); + PartitionId partid = isglobalidx ? + BTreeTupleGetPartitionId(rel, itup) : InvalidPartitionId; if (ntids + nposting > spacentids) { spacentids = Max(spacentids * 2, ntids + nposting); - tidblocks = (BlockNumber *) - repalloc(tidblocks, sizeof(BlockNumber) * spacentids); + tidblocks = (BTHeapBlockInfo *) + repalloc(tidblocks, sizeof(BTHeapBlockInfo) * spacentids); } for (int j = 0; j < nposting; j++) { ItemPointer tid = BTreeTupleGetPostingN(itup, j); - tidblocks[ntids++] = ItemPointerGetBlockNumber(tid); + tidblocks[ntids].partid = partid; + tidblocks[ntids++].blockno = ItemPointerGetBlockNumber(tid); } } } - qsort(tidblocks, ntids, sizeof(BlockNumber), _bt_blk_cmp); - *nblocks = qunique(tidblocks, ntids, sizeof(BlockNumber), _bt_blk_cmp); + qsort(tidblocks, ntids, sizeof(BTHeapBlockInfo), _bt_blk_cmp); + *nblocks = qunique(tidblocks, ntids, sizeof(BTHeapBlockInfo), _bt_blk_cmp); return tidblocks; } @@ -3010,8 +3131,15 @@ _bt_deadblocks(Page page, OffsetNumber *deletable, int ndeletable, static inline int _bt_blk_cmp(const void *arg1, const void *arg2) { - BlockNumber b1 = *((BlockNumber *) arg1); - BlockNumber b2 = *((BlockNumber *) arg2); + BTHeapBlockInfo *b1 = ((BTHeapBlockInfo *) arg1); + BTHeapBlockInfo *b2 = ((BTHeapBlockInfo *) arg2); + int res; - return pg_cmp_u32(b1, b2); + /* + * First compare partids if they are same then compare the block numbers. + */ + res = pg_cmp_u32(b1->partid, b2->partid); + if (res == 0) + res = pg_cmp_u32(b1->blockno, b2->blockno); + return res; } diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index c79dd38ee1..08505cd262 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -24,6 +24,7 @@ #include "access/nbtree.h" #include "access/nbtxlog.h" +#include "access/table.h" #include "access/tableam.h" #include "access/transam.h" #include "access/xlog.h" @@ -1509,9 +1510,9 @@ _bt_delitems_cmp(const void *a, const void *b) * field (tableam will sort deltids for its own reasons, so we'll need to put * it back in leaf-page-wise order afterwards). */ -void -_bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, - TM_IndexDeleteOp *delstate) +static void +_bt_delitems_delete_check_guts(Relation rel, Buffer buf, Relation heapRel, + TM_IndexDeleteOp *delstate) { Page page = BufferGetPage(buf); TransactionId snapshotConflictHorizon; @@ -1678,6 +1679,83 @@ _bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, pfree(updatable[i]); } +/* + * Try to delete item(s) from a btree leaf page during single-page cleanup. + * + * Refer to the detailed comments in '_bt_delitems_delete_check_guts' for more + * information. This function serves as a wrapper to handle the case of a + * global index, where we might have TIDs from multiple partitions. It calls + * the core functionality for each heap relation corresponding to each + * partition. + */ +void +_bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, + TM_IndexDeleteOp *delstate, + PartidDeltidMapping *mapping) +{ + /* + * For global index we need to delete the items for each partition + * separately. + */ + if (RelationIsGlobalIndex(rel)) + { + int ndeltid; + int starttid = 0; + Oid prevpartid = InvalidPartitionId; + TM_IndexDeleteOp partdelstate = *delstate; + + /* + * Sort the mapping array in partittion id order so that we avoid + * calling tableAM for same relation multiple times. + */ + qsort(mapping, delstate->ndeltids, sizeof(PartidDeltidMapping), + _bt_indexdel_cmp); + + for (ndeltid = 0; ndeltid < delstate->ndeltids; ndeltid++) + { + + /* + * If ndeltid is not same as the index present in the mapping then + * swap it with the correct entry. + */ + if (mapping[ndeltid].idx != ndeltid) + { + int idx = mapping[ndeltid].idx; + TM_IndexDelete tmp = delstate->deltids[idx]; + + delstate->deltids[idx] = delstate->deltids[ndeltid]; + delstate->deltids[ndeltid] = tmp; + } + + /* + * If this item belong to a different PartitionID mean we need to + * process delete for all the items of the previous PartitionID. + * Also if this is the last item then we need to process all the + * items of the last PartitionId. + */ + if (PartIdIsValid(prevpartid) && + (mapping[0].partid != prevpartid || + ndeltid == delstate->ndeltids - 1)) + { + Oid reloid = IndexGetPartitionReloid(rel, prevpartid); + Relation childRel = table_open(reloid, AccessShareLock); + + partdelstate.deltids = &delstate->deltids[starttid]; + partdelstate.ndeltids = ndeltid - starttid; + + _bt_delitems_delete_check_guts(rel, buf, childRel, + &partdelstate); + starttid = ndeltid; + table_close(childRel, AccessShareLock); + } + + prevpartid = mapping[ndeltid].partid; + } + } + else + _bt_delitems_delete_check_guts(rel, buf, heapRel, delstate); +} + /* * Check that leftsib page (the btpo_prev of target page) is not marked with * INCOMPLETE_SPLIT flag. Used during page deletion. diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index fdff960c13..c3960784eb 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -1505,6 +1505,16 @@ backtrack: nhtidslive = 0; if (callback) { + PartitionId partid = InvalidPartitionId; + + /* + * If this is a global index then get the partition id for the + * heap relation being vacuum so that we only call the callback + * functions for the index tuple which belong to this partition. + */ + if (RelationIsGlobalIndex(rel) && !PartIdIsValid(partid)) + partid = IndexGetRelationPartitionId(rel, RelationGetRelid(heaprel)); + /* btbulkdelete callback tells us what to delete (or update) */ for (offnum = minoff; offnum <= maxoff; @@ -1515,6 +1525,14 @@ backtrack: itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); + /* + * For global index only call the callback for the heap + * relation which is being vacuumed; + */ + if (RelationIsGlobalIndex(rel) && + BTreeTupleGetPartitionId(rel, itup) != partid) + continue; + Assert(!BTreeTupleIsPivot(itup)); if (!BTreeTupleIsPosting(itup)) { diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 9d70e89c1f..bba47dc969 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -46,9 +46,11 @@ #include "access/table.h" #include "access/xact.h" #include "catalog/index.h" +#include "catalog/pg_inherits.h" #include "commands/progress.h" #include "executor/instrument.h" #include "miscadmin.h" +#include "partitioning/partdesc.h" #include "pgstat.h" #include "storage/bulk_write.h" #include "tcop/tcopprot.h" @@ -286,7 +288,9 @@ static void _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, BTShared *btshared, Sharedsort *sharedsort, Sharedsort *sharedsort2, int sortmem, bool progress); - +static double _bt_spool_scan_partitions(IndexInfo *indexInfo, Relation rel, + BTBuildState *buildstate, + Relation irel); /* * btbuild() -- build a new btree index. @@ -350,6 +354,68 @@ btbuild(Relation heap, Relation index, IndexInfo *indexInfo) return result; } +/* + * This is a wrapper function to call table_index_build_scan() for each leaf + * partition while building a global index. + */ +static double +_bt_spool_scan_partitions(IndexInfo *indexInfo, Relation rel, + BTBuildState *buildstate, Relation irel) +{ + double reltuples = 0; + List *tableIds; + + Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + Assert(RelationIsGlobalIndex(irel)); + + /* + * To retrieve the tuple from all leaf relations, we need to obtain a list + * of all inheritor relations. This operation should only be performed + * during the creation of an index, reindexing, or truncating a relation. + * In these cases, when a global index is involved, the caller already + * holds the necessary locks on all inheritor relations. Therefore, we can + * safely proceed with NoLock in this context. + */ + tableIds = find_all_inheritors(RelationGetRelid(rel), NoLock, NULL); + + foreach_oid(tableOid, tableIds) + { + Relation childrel = table_open(tableOid, NoLock); + double curreltuples; + + /* + * Only leaf relation holds the data so we can ignore other inheritors. + */ + if (childrel->rd_rel->relkind != RELKIND_RELATION) + { + table_close(childrel, NoLock); + continue; + } + + /* + * Get partition id of this partition with respect to the global + * index. + */ + indexInfo->ii_partid = IndexGetRelationPartitionId(irel, tableOid); + curreltuples = table_index_build_scan(childrel, irel, indexInfo, true, + true, _bt_build_callback, + (void *) buildstate, NULL); + reltuples += curreltuples; + + /* + * This is the right place to update the relation stats while building + * the global index because at this point we know the individual + * tuples for each partition. + */ + index_update_stats(childrel, true, true, curreltuples); + table_close(childrel, NoLock); + } + + list_free(tableIds); + + return reltuples; +} + /* * Create and initialize one or two spool structures, and save them in caller's * buildstate argument. May also fill-in fields within indexInfo used by index @@ -474,9 +540,19 @@ _bt_spools_heapscan(Relation heap, Relation index, BTBuildState *buildstate, /* Fill spool using either serial or parallel heap scan */ if (!buildstate->btleader) - reltuples = table_index_build_scan(heap, index, indexInfo, true, true, - _bt_build_callback, buildstate, - NULL); + { + /* + * If we are building a global index then we need to scan all the + * child partitions and insert into the global index. + */ + if (RelationIsGlobalIndex(index)) + reltuples = _bt_spool_scan_partitions(indexInfo, heap, + buildstate, index); + else + reltuples = table_index_build_scan(heap, index, indexInfo, true, + true, _bt_build_callback, + buildstate, NULL); + } else reltuples = _bt_parallel_heapscan(buildstate, &indexInfo->ii_BrokenHotChain); diff --git a/src/backend/access/table/table.c b/src/backend/access/table/table.c index be698bba0e..8243a565f9 100644 --- a/src/backend/access/table/table.c +++ b/src/backend/access/table/table.c @@ -139,6 +139,7 @@ validate_relation_kind(Relation r) { if (r->rd_rel->relkind == RELKIND_INDEX || r->rd_rel->relkind == RELKIND_PARTITIONED_INDEX || + r->rd_rel->relkind == RELKIND_GLOBAL_INDEX || r->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), diff --git a/src/backend/bootstrap/bootparse.y b/src/backend/bootstrap/bootparse.y index 9833f52c1b..2393ed45ff 100644 --- a/src/backend/bootstrap/bootparse.y +++ b/src/backend/bootstrap/bootparse.y @@ -313,6 +313,7 @@ Boot_DeclareIndexStmt: $4, InvalidOid, InvalidOid, + NIL, -1, false, false, @@ -366,6 +367,7 @@ Boot_DeclareUniqueIndexStmt: $5, InvalidOid, InvalidOid, + NIL, -1, false, false, diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 9ca8a88dc9..6584202525 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -1800,7 +1800,8 @@ ExecGrant_Relation(InternalGrant *istmt) /* Not sensible to grant on an index */ if (pg_class_tuple->relkind == RELKIND_INDEX || - pg_class_tuple->relkind == RELKIND_PARTITIONED_INDEX) + pg_class_tuple->relkind == RELKIND_PARTITIONED_INDEX || + pg_class_tuple->relkind == RELKIND_GLOBAL_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is an index", @@ -4364,6 +4365,7 @@ recordExtObjInitPriv(Oid objoid, Oid classoid) */ if (pg_class_tuple->relkind == RELKIND_INDEX || pg_class_tuple->relkind == RELKIND_PARTITIONED_INDEX || + pg_class_tuple->relkind == RELKIND_GLOBAL_INDEX || pg_class_tuple->relkind == RELKIND_COMPOSITE_TYPE) { ReleaseSysCache(tuple); @@ -4524,6 +4526,7 @@ removeExtObjInitPriv(Oid objoid, Oid classoid) */ if (pg_class_tuple->relkind == RELKIND_INDEX || pg_class_tuple->relkind == RELKIND_PARTITIONED_INDEX || + pg_class_tuple->relkind == RELKIND_GLOBAL_INDEX || pg_class_tuple->relkind == RELKIND_COMPOSITE_TYPE) { ReleaseSysCache(tuple); diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 7dded634eb..2db9d847e0 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1358,7 +1358,8 @@ doDeletion(const ObjectAddress *object, int flags) char relKind = get_rel_relkind(object->objectId); if (relKind == RELKIND_INDEX || - relKind == RELKIND_PARTITIONED_INDEX) + relKind == RELKIND_PARTITIONED_INDEX || + relKind == RELKIND_GLOBAL_INDEX) { bool concurrent = ((flags & PERFORM_DELETION_CONCURRENTLY) != 0); bool concurrent_lock_mode = ((flags & PERFORM_DELETION_CONCURRENT_LOCK) != 0); diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index fd6537567e..19f27f8f2b 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1221,6 +1221,7 @@ heap_create_with_catalog(const char *relname, */ Assert(relkind != RELKIND_INDEX); Assert(relkind != RELKIND_PARTITIONED_INDEX); + Assert(relkind != RELKIND_GLOBAL_INDEX); if (relkind == RELKIND_TOASTVALUE) { @@ -1338,7 +1339,8 @@ heap_create_with_catalog(const char *relname, if (!(relkind == RELKIND_SEQUENCE || relkind == RELKIND_TOASTVALUE || relkind == RELKIND_INDEX || - relkind == RELKIND_PARTITIONED_INDEX)) + relkind == RELKIND_PARTITIONED_INDEX || + relkind == RELKIND_GLOBAL_INDEX)) { Oid new_array_oid; ObjectAddress new_type_addr; @@ -1875,6 +1877,24 @@ heap_drop_with_catalog(Oid relid) if (relid == defaultPartOid) update_default_partition_oid(parentOid, InvalidOid); + /* + * If leaf relation of a partitioned table is being drop then detach it + * from the global indexes i.e. remove all the mappings from + * pg_index_partition relation. We don't have any mapping for non-leaf + * relation so nothing to do for them. + */ + if (get_rel_relispartition(relid)) + { + List *indexids = RelationGetIndexList(rel); + List *reloids = list_make1_oid(relid); + + /* Detach the reloid from the global indexes. */ + DetachFromGlobalIndexes(indexids, reloids); + + list_free(indexids); + list_free(reloids); + } + /* * Schedule unlinking of the relation's physical files at commit. */ @@ -3534,6 +3554,9 @@ RemoveStatistics(Oid relid, AttrNumber attnum) * * The routine will truncate and then reconstruct the indexes on * the specified relation. Caller must hold exclusive lock on rel. + * + * TODO: Handle the global indexes, global indexes should not be rebuild while + * truncating each leaf relation. */ static void RelationTruncateIndexes(Relation heapRelation) diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index aa216683b7..2bf9b59c9d 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -44,6 +44,7 @@ #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" #include "catalog/pg_description.h" +#include "catalog/pg_index_partitions.h" #include "catalog/pg_inherits.h" #include "catalog/pg_opclass.h" #include "catalog/pg_operator.h" @@ -62,6 +63,7 @@ #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #include "parser/parser.h" +#include "partitioning/partdesc.h" #include "pgstat.h" #include "postmaster/autovacuum.h" #include "rewrite/rewriteManip.h" @@ -120,9 +122,6 @@ static void UpdateIndexRelation(Oid indexoid, Oid heapoid, bool immediate, bool isvalid, bool isready); -static void index_update_stats(Relation rel, - bool hasindex, - double reltuples); static void IndexCheckExclusion(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo); @@ -342,12 +341,24 @@ ConstructTupleDescriptor(Relation heapRelation, /* Simple index column */ const FormData_pg_attribute *from; - Assert(atnum > 0); /* should've been caught above */ + /* + * For global indexes along with the positive attribute number we + * can also get the PartitionIdAttributeNumber. + */ + Assert(atnum > 0 || atnum == PartitionIdAttributeNumber); if (atnum > natts) /* safety check */ elog(ERROR, "invalid column number %d", atnum); - from = TupleDescAttr(heapTupDesc, - AttrNumberGetAttrOffset(atnum)); + + /* + * If the attribute number is PartitionIdAttributeNumber then + * directly assign to the predefined partitionid_attr constant. + */ + if (atnum == PartitionIdAttributeNumber) + from = &partitionid_attr; + else + from = TupleDescAttr(heapTupDesc, + AttrNumberGetAttrOffset(atnum)); to->atttypid = from->atttypid; to->attlen = from->attlen; @@ -719,6 +730,7 @@ UpdateIndexRelation(Oid indexoid, * allow_system_table_mods: allow table to be a system catalog * is_internal: if true, post creation hook for new index * constraintId: if not NULL, receives OID of created constraint + * inheritors: if not NIL, receives OIDs of all the inheritors * * Returns the OID of the created index. */ @@ -743,7 +755,8 @@ index_create(Relation heapRelation, bits16 constr_flags, bool allow_system_table_mods, bool is_internal, - Oid *constraintId) + Oid *constraintId, + List *inheritors) { Oid heapRelationId = RelationGetRelid(heapRelation); Relation pg_class; @@ -759,6 +772,7 @@ index_create(Relation heapRelation, bool invalid = (flags & INDEX_CREATE_INVALID) != 0; bool concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0; bool partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0; + bool global_index = (flags & INDEX_CREATE_GLOBAL) != 0; char relkind; TransactionId relfrozenxid; MultiXactId relminmxid; @@ -770,7 +784,13 @@ index_create(Relation heapRelation, /* partitioned indexes must never be "built" by themselves */ Assert(!partitioned || (flags & INDEX_CREATE_SKIP_BUILD)); - relkind = partitioned ? RELKIND_PARTITIONED_INDEX : RELKIND_INDEX; + if (global_index) + relkind = RELKIND_GLOBAL_INDEX; + else if (partitioned) + relkind = RELKIND_PARTITIONED_INDEX; + else + relkind = RELKIND_INDEX; + is_exclusion = (indexInfo->ii_ExclusionOps != NULL); pg_class = table_open(RelationRelationId, RowExclusiveLock); @@ -1051,10 +1071,34 @@ index_create(Relation heapRelation, !concurrent); /* - * Register relcache invalidation on the indexes' heap relation, to - * maintain consistency of its index list + * Create the mapping in pg_index_partitions table, also register relcache + * invalidation on the indexes' heap relation, to maintain consistency of + * its index list. If we are creating a global index then invalidate the + * relcache of all the inheritors as well. */ - CacheInvalidateRelcache(heapRelation); + if (global_index) + { + Assert(inheritors != NIL); + AttachParittionsToGlobalIndex(indexRelation, inheritors); + foreach_oid(tableOid, inheritors) + { + Relation childrel = table_open(tableOid, NoLock); + + CacheInvalidateRelcache(childrel); + table_close(childrel, NoLock); + } + + /* + * IndexPartitionInfo cache got built while we were inserting the tuple + * in system table so this might not be complete so clean this up and + * let it get build whenever needed. + * + * FIXME recheck whether we really need to do this? + */ + indexRelation->rd_indexpartinfo = NULL; + } + else + CacheInvalidateRelcache(heapRelation); /* update pg_inherits and the parent's relhassubclass, if needed */ if (OidIsValid(parentIndexRelid)) @@ -1268,7 +1312,7 @@ index_create(Relation heapRelation, * having an index. */ index_update_stats(heapRelation, - true, + true, false, -1.0); /* Make the above update visible */ CommandCounterIncrement(); @@ -1462,7 +1506,8 @@ index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId, 0, true, /* allow table to be a system catalog? */ false, /* is_internal? */ - NULL); + NULL, + NIL); /* Close the relations used and clean up */ index_close(indexRelation, NoLock); @@ -2127,6 +2172,7 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) Relation indexRelation; HeapTuple tuple; bool hasexprs; + bool isglobal; LockRelId heaprelid, indexrelid; LOCKTAG heaplocktag; @@ -2319,6 +2365,9 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) TransferPredicateLocksToHeapRelation(userIndexRelation); } + /* Remember whether it is a global index. */ + isglobal = RelationIsGlobalIndex(userIndexRelation); + /* * Schedule physical removal of the files (if any) */ @@ -2384,15 +2433,36 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) */ DeleteInheritsTuple(indexId, InvalidOid, false, NULL); + /* + * Remove all the mapping present in pg_index_partitions table for this + * global index. + */ + if (isglobal) + DeleteIndexPartitionEntries(indexId); + /* * We are presently too lazy to attempt to compute the new correct value * of relhasindex (the next VACUUM will fix it if necessary). So there is * no need to update the pg_class tuple for the owning relation. But we * must send out a shared-cache-inval notice on the owning relation to * ensure other backends update their relcache lists of indexes. (In the - * concurrent case, this is redundant but harmless.) + * concurrent case, this is redundant but harmless.). If we are dropping a + * global index then invalidate the relcache of all the inheritors as well. */ - CacheInvalidateRelcache(userHeapRelation); + if (isglobal) + { + /* + * Pass lockmode as NoLock because caller should already hold the lock + * on all the partitions. Check code in RemoveRelations(). + */ + List *tableIds = find_all_inheritors(heapId, NoLock, NULL); + + foreach_oid(tableOid, tableIds) + CacheInvalidateRelcacheByRelid(tableOid); + list_free(tableIds); + } + else + CacheInvalidateRelcache(userHeapRelation); /* * Close owning rel, but keep lock @@ -2753,7 +2823,16 @@ FormIndexDatum(IndexInfo *indexInfo, Datum iDatum; bool isNull; - if (keycol < 0) + /* + * If the attribute number is PartitionIdAttributeNumber then directly + * assign the value stored in indexInfo->ii_partid. + */ + if (keycol == PartitionIdAttributeNumber) + { + iDatum = indexInfo->ii_partid; + isNull = false; + } + else if (keycol < 0) iDatum = slot_getsysattr(slot, keycol, &isNull); else if (keycol != 0) { @@ -2805,9 +2884,10 @@ FormIndexDatum(IndexInfo *indexInfo, * index. When updating an index, it's important because some index AMs * expect a relcache flush to occur after REINDEX. */ -static void +void index_update_stats(Relation rel, bool hasindex, + bool hasglobalindex, double reltuples) { bool update_stats; @@ -2930,6 +3010,18 @@ index_update_stats(Relation rel, dirty = true; } + /* + * Set it to true if we have created global index and it is already not set + * to true. Afterward if we are creating some other index then input + * hasglobalindex would be false so we don't need to do anything in that + * case. + */ + if (hasglobalindex && !rd_rel->relhasglobalindex) + { + rd_rel->relhasglobalindex = hasglobalindex; + dirty = true; + } + if (update_stats) { if (rd_rel->relpages != (int32) relpages) @@ -2981,7 +3073,6 @@ index_update_stats(Relation rel, table_close(pg_class, RowExclusiveLock); } - /* * index_build - invoke access-method-specific index build procedure * @@ -3010,6 +3101,10 @@ index_build(Relation heapRelation, int save_sec_context; int save_nestlevel; + /* XXX Currently parallel build is not supported for global indexes. */ + if (RelationIsGlobalIndex(indexRelation)) + parallel = false; + /* * sanity checks */ @@ -3150,14 +3245,26 @@ index_build(Relation heapRelation, } /* - * Update heap and index pg_class rows + * Update the pg_class rows for the heap and index. If this is a + * partitioned relation, meaning we are building a global index, so just + * set the relation has an index. We have already updated the heap tuple + * stats for leaf relation while processing each partition inside + * _bt_spool_scan_partitions(). + * + * TODO: We might choose to change the ambuild function to return array + * of stats so that we can get a seperate stats for each partition. And + * then instead of setting stats in _bt_spool_scan_partitions() we can do + * that here because interface wise that would look cleaner. */ - index_update_stats(heapRelation, - true, - stats->heap_tuples); + if (heapRelation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + index_update_stats(heapRelation, true, true, -1.0); + else + index_update_stats(heapRelation, + true, false, + stats->heap_tuples); index_update_stats(indexRelation, - false, + false, false, stats->index_tuples); /* Make the updated catalog row versions visible */ @@ -3607,10 +3714,9 @@ IndexGetRelation(Oid indexId, bool missing_ok) void reindex_index(const ReindexStmt *stmt, Oid indexId, bool skip_constraint_checks, char persistence, - const ReindexParams *params) + const ReindexParams *params, Relation heapRelation) { - Relation iRel, - heapRelation; + Relation iRel; Oid heapId; Oid save_userid; int save_sec_context; @@ -3620,27 +3726,44 @@ reindex_index(const ReindexStmt *stmt, Oid indexId, PGRUsage ru0; bool progress = ((params->options & REINDEXOPT_REPORT_PROGRESS) != 0); bool set_tablespace = false; + bool close_rel = false; pg_rusage_init(&ru0); /* - * Open and lock the parent heap relation. ShareLock is sufficient since - * we only need to be sure no schema or data changes are going on. + * Open and lock the parent heap relation if not done by caller. ShareLock + * is sufficient since we only need to be sure no schema or data changes + * are going on. */ - heapId = IndexGetRelation(indexId, - (params->options & REINDEXOPT_MISSING_OK) != 0); - /* if relation is missing, leave */ - if (!OidIsValid(heapId)) - return; + if (!heapRelation) + { + heapId = IndexGetRelation(indexId, + (params->options & REINDEXOPT_MISSING_OK) != 0); + /* if relation is missing, leave */ + if (!OidIsValid(heapId)) + return; + + if ((params->options & REINDEXOPT_MISSING_OK) != 0) + heapRelation = try_table_open(heapId, ShareLock); + else + heapRelation = table_open(heapId, ShareLock); - if ((params->options & REINDEXOPT_MISSING_OK) != 0) - heapRelation = try_table_open(heapId, ShareLock); + /* if relation is gone, leave */ + if (!heapRelation) + return; + close_rel = true; + } else - heapRelation = table_open(heapId, ShareLock); + heapId = RelationGetRelid(heapRelation); - /* if relation is gone, leave */ - if (!heapRelation) - return; + /* + * If we are reindexing the global index then lock all the inheritors + * because we are going to access all the inheritors for building the + * global index. ShareLock is enough to prevent schema modifications. + * We need to lock. + */ + if (get_rel_relkind(indexId) == RELKIND_GLOBAL_INDEX) + (void) find_all_inheritors(heapId, ShareLock, NULL); /* * Switch to the table owner's userid, so that any index functions are run @@ -3903,7 +4026,10 @@ reindex_index(const ReindexStmt *stmt, Oid indexId, /* Close rels, but keep locks */ index_close(iRel, NoLock); - table_close(heapRelation, NoLock); + + /* Do not close the rel if it is passed by the caller. */ + if (close_rel) + table_close(heapRelation, NoLock); if (progress) pgstat_progress_end_command(); @@ -4070,8 +4196,19 @@ reindex_relation(const ReindexStmt *stmt, Oid relid, int flags, continue; } + /* + * Skip global indexes when reindexing individual relations, as the + * caller will handle them separately. This prevents redundant + * reindexing and ensures that global indexes are processed only once. + */ + if (get_rel_relkind(indexOid) == RELKIND_GLOBAL_INDEX) + { + RemoveReindexPending(indexOid); + continue; + } + reindex_index(stmt, indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS), - persistence, params); + persistence, params, NULL); CommandCounterIncrement(); diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index d97d632a7e..f3c9d977e7 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -26,6 +26,7 @@ #include "catalog/dependency.h" #include "catalog/namespace.h" #include "catalog/objectaccess.h" +#include "catalog/partition.h" #include "catalog/pg_authid.h" #include "catalog/pg_collation.h" #include "catalog/pg_conversion.h" diff --git a/src/backend/catalog/objectaddress.c b/src/backend/catalog/objectaddress.c index b63fd57dc0..4a79ade8e4 100644 --- a/src/backend/catalog/objectaddress.c +++ b/src/backend/catalog/objectaddress.c @@ -1355,7 +1355,8 @@ get_relation_by_qualified_name(ObjectType objtype, List *object, { case OBJECT_INDEX: if (relation->rd_rel->relkind != RELKIND_INDEX && - relation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + relation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX && + relation->rd_rel->relkind != RELKIND_GLOBAL_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not an index", @@ -4137,6 +4138,10 @@ getRelationDescription(StringInfo buffer, Oid relid, bool missing_ok) appendStringInfo(buffer, _("index %s"), relname); break; + case RELKIND_GLOBAL_INDEX: + appendStringInfo(buffer, _("global index %s"), + relname); + break; case RELKIND_SEQUENCE: appendStringInfo(buffer, _("sequence %s"), relname); @@ -4713,6 +4718,9 @@ getRelationTypeDescription(StringInfo buffer, Oid relid, int32 objectSubId, case RELKIND_PARTITIONED_INDEX: appendStringInfoString(buffer, "index"); break; + case RELKIND_GLOBAL_INDEX: + appendStringInfoString(buffer, "global index"); + break; case RELKIND_SEQUENCE: appendStringInfoString(buffer, "sequence"); break; @@ -6192,6 +6200,7 @@ get_relkind_objtype(char relkind) return OBJECT_TABLE; case RELKIND_INDEX: case RELKIND_PARTITIONED_INDEX: + case RELKIND_GLOBAL_INDEX: return OBJECT_INDEX; case RELKIND_SEQUENCE: return OBJECT_SEQUENCE; diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 93d72157a4..472a096206 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -60,9 +60,11 @@ get_partition_parent(Oid relid, bool even_if_detached) result = get_partition_parent_worker(catalogRelation, relid, &detach_pending); - if (!OidIsValid(result)) - elog(ERROR, "could not find tuple for parent of relation %u", relid); + { + table_close(catalogRelation, AccessShareLock); + return InvalidOid; + } if (detach_pending && !even_if_detached) elog(ERROR, "relation %u has no parent because it's being detached", diff --git a/src/backend/catalog/pg_class.c b/src/backend/catalog/pg_class.c index 18eecbdfc0..3519ed9ae0 100644 --- a/src/backend/catalog/pg_class.c +++ b/src/backend/catalog/pg_class.c @@ -45,6 +45,8 @@ errdetail_relkind_not_supported(char relkind) return errdetail("This operation is not supported for partitioned tables."); case RELKIND_PARTITIONED_INDEX: return errdetail("This operation is not supported for partitioned indexes."); + case RELKIND_GLOBAL_INDEX: + return errdetail("This operation is not supported for global indexes."); default: elog(ERROR, "unrecognized relkind: '%c'", relkind); return 0; diff --git a/src/backend/catalog/pg_index_partitions.c b/src/backend/catalog/pg_index_partitions.c index e637feb453..c03fcd45e5 100644 --- a/src/backend/catalog/pg_index_partitions.c +++ b/src/backend/catalog/pg_index_partitions.c @@ -254,9 +254,8 @@ IndexGetPartitionReloid(Relation irel, PartitionId partid) * InvalidateIndexPartitionEntries - Invalidate pg_index_partitions entries * * Set reloid as Invalid in pg_index_partitions entries with respect to the - * given reloid. If a valid global indexoids list is given then only - * invalidate the reloid entires which are related to the input global index - * oids. + * given reloid. If a valid reloids list is given then only + * invalidate the reloid entires which are related to the input reloids. */ void InvalidateIndexPartitionEntries(List *reloids, Oid indexoid) @@ -340,3 +339,46 @@ IndexGetNextPartitionID(Relation irel) return partid; } + +/* + * IndexPartitionRelidGlobalIndexList - Get global index list for give reloid + * + * Get list of all the global index for given relation oid. + */ +List * +IndexPartitionRelidGetGlobalIndexOids(Oid reloid) +{ + Relation catalogRelation; + SysScanDesc scan; + ScanKeyData key; + HeapTuple tuple; + List *globalindexoids = NIL; + + /* + * Find pg_inherits entries by inhparent. (We need to scan them all in + * order to verify that no other partition is pending detach.) + */ + catalogRelation = table_open(IndexPartitionsRelationId, RowExclusiveLock); + + ScanKeyInit(&key, + Anum_pg_index_partitions_reloid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(reloid)); + + scan = systable_beginscan(catalogRelation, IndexPartitionsReloidIndexId, + true, NULL, 1, &key); + + while ((tuple = systable_getnext(scan)) != NULL) + { + Form_pg_index_partitions form = (Form_pg_index_partitions) GETSTRUCT(tuple); + + Assert(form->reloid == reloid); + globalindexoids = lappend_oid(globalindexoids, form->indexoid); + } + + /* Done */ + systable_endscan(scan); + table_close(catalogRelation, RowExclusiveLock); + + return globalindexoids; +} diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 874a8fc89a..a534bae291 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -325,7 +325,7 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, BTREE_AM_OID, rel->rd_rel->reltablespace, collationIds, opclassIds, NULL, coloptions, NULL, (Datum) 0, - INDEX_CREATE_IS_PRIMARY, 0, true, true, NULL); + INDEX_CREATE_IS_PRIMARY, 0, true, true, NULL, NIL); table_close(toast_rel, NoLock); diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 7111d5d533..4417e927e6 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -421,19 +421,10 @@ do_analyze_rel(Relation onerel, const VacuumParams params, * an explicit column list in the ANALYZE command, however. * * If we are doing a recursive scan, we don't want to touch the parent's - * indexes at all. If we're processing a partitioned table, we need to - * know if there are any indexes, but we don't want to process them. + * indexes at all. Partitioned table can also have global indexes so we + * need to open indexes for the partitioned table as well. */ - if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) - { - List *idxs = RelationGetIndexList(onerel); - - Irel = NULL; - nindexes = 0; - hasindex = idxs != NIL; - list_free(idxs); - } - else if (!inh) + if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || !inh) { vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel); hasindex = nindexes > 0; @@ -651,24 +642,6 @@ do_analyze_rel(Relation onerel, const VacuumParams params, InvalidMultiXactId, NULL, NULL, in_outer_xact); - - /* Same for indexes */ - for (ind = 0; ind < nindexes; ind++) - { - AnlIndexData *thisdata = &indexdata[ind]; - double totalindexrows; - - totalindexrows = ceil(thisdata->tupleFract * totalrows); - vac_update_relstats(Irel[ind], - RelationGetNumberOfBlocks(Irel[ind]), - totalindexrows, - 0, 0, - false, - InvalidTransactionId, - InvalidMultiXactId, - NULL, NULL, - in_outer_xact); - } } else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { @@ -684,6 +657,28 @@ do_analyze_rel(Relation onerel, const VacuumParams params, in_outer_xact); } + /* Same for indexes */ + for (ind = 0; ind < nindexes; ind++) + { + AnlIndexData *thisdata = &indexdata[ind]; + double totalindexrows; + + /* Nothing to be done for the partitioned indexes. */ + if (Irel[ind]->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + continue; + + totalindexrows = ceil(thisdata->tupleFract * totalrows); + vac_update_relstats(Irel[ind], + RelationGetNumberOfBlocks(Irel[ind]), + totalindexrows, + 0, 0, + false, + InvalidTransactionId, + InvalidMultiXactId, + NULL, NULL, + in_outer_xact); + } + /* * Now report ANALYZE to the cumulative stats system. For regular tables, * we do it only if not doing inherited stats. For partitioned tables, we diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index b55221d44c..692f441851 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -189,6 +189,11 @@ cluster(ParseState *pstate, ClusterStmt *stmt, bool isTopLevel) (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("index \"%s\" for table \"%s\" does not exist", stmt->indexname, stmt->relation->relname))); + if (get_rel_relkind(indexOid) == RELKIND_GLOBAL_INDEX) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("can not cluster using global index \"%s\" ", + stmt->indexname))); } /* For non-partitioned tables, do what we came here to do. */ diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 6f753ab6d7..0bfd2e7dbf 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -24,6 +24,7 @@ #include "access/tableam.h" #include "access/xact.h" #include "catalog/catalog.h" +#include "catalog/heap.h" #include "catalog/index.h" #include "catalog/indexing.h" #include "catalog/namespace.h" @@ -32,6 +33,7 @@ #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" #include "catalog/pg_database.h" +#include "catalog/pg_index_partitions.h" #include "catalog/pg_inherits.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" @@ -95,7 +97,7 @@ static void ComputeIndexAttrs(IndexInfo *indexInfo, int *ddl_save_nestlevel); static char *ChooseIndexName(const char *tabname, Oid namespaceId, const List *colnames, const List *exclusionOpNames, - bool primary, bool isconstraint); + bool primary, bool isconstraint, bool global); static char *ChooseIndexNameAddition(const List *colnames); static List *ChooseIndexColumnNames(const List *indexElems); static void ReindexIndex(const ReindexStmt *stmt, const ReindexParams *params, @@ -109,6 +111,8 @@ static void ReindexMultipleTables(const ReindexStmt *stmt, static void reindex_error_callback(void *arg); static void ReindexPartitions(const ReindexStmt *stmt, Oid relid, const ReindexParams *params, bool isTopLevel); +static void ReindexPartitionedRelation(List *reloids, + const ReindexParams *params); static void ReindexMultipleInternal(const ReindexStmt *stmt, const List *relids, const ReindexParams *params); static bool ReindexRelationConcurrently(const ReindexStmt *stmt, @@ -276,7 +280,13 @@ CheckIndexCompatible(Oid oldId, } /* Any change in operator class or collation breaks compatibility. */ - old_natts = indexForm->indnkeyatts; + + /* For global index ignore the partitionID attribute. */ + if (get_rel_relkind(oldId) == RELKIND_GLOBAL_INDEX) + old_natts = indexForm->indnkeyatts - 1; + else + old_natts = indexForm->indnkeyatts; + Assert(old_natts == numberOfAttributes); d = SysCacheGetAttrNotNull(INDEXRELID, tuple, Anum_pg_index_indcollation); @@ -525,6 +535,7 @@ WaitForOlderSnapshots(TransactionId limitXmin, bool progress) * of a partitioned index. * 'parentConstraintId': the OID of the parent constraint; InvalidOid if not * the child of a constraint (only used when recursing) + * 'inheritors' List of all inheritor's OIDs if this is a partitioned relation; * 'total_parts': total number of direct and indirect partitions of relation; * pass -1 if not known or rel is not partitioned. * 'is_alter_table': this is due to an ALTER rather than a CREATE operation. @@ -544,6 +555,7 @@ DefineIndex(Oid tableId, Oid indexRelationId, Oid parentIndexId, Oid parentConstraintId, + List *inheritors, int total_parts, bool is_alter_table, bool check_rights, @@ -636,6 +648,27 @@ DefineIndex(Oid tableId, pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID, InvalidOid); + /* + * If this is a global index, we must append a partition identifier to + * uniquely identify the heap tuple. Therefore, in this design, we have + * opted to include the partition-id as the last key column. + * + * The rationale behind storing it as the last key column is that in + * various scenarios, we would treat this column as an extended + * index key column. Essentially, each index tuple must be uniquely + * identified. Therefore, if we encounter duplicate keys, we utilize heap + * tid as a tiebreaker. However, for global indexes, relying solely on + * heap tid isn't adequate; we also require the partition identifier. + */ + if (stmt->global) + { + IndexElem *newparam = makeNode(IndexElem); + + newparam->name = NULL; + newparam->expr = NULL; + stmt->indexParams = lappend(stmt->indexParams, newparam); + } + /* * count key attributes in index */ @@ -738,6 +771,11 @@ DefineIndex(Oid tableId, errmsg("cannot create index on partitioned table \"%s\" concurrently", RelationGetRelationName(rel)))); } + else if (stmt->global) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create global index on non partitioned table \"%s\"", + RelationGetRelationName(rel)))); /* * Don't try to CREATE INDEX on temp tables of other backends. @@ -832,7 +870,8 @@ DefineIndex(Oid tableId, indexColNames, stmt->excludeOpNames, stmt->primary, - stmt->isconstraint); + stmt->isconstraint, + stmt->global); /* * look up the access method, verify it can handle the requested features @@ -891,6 +930,11 @@ DefineIndex(Oid tableId, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("access method \"%s\" does not support WITHOUT OVERLAPS constraints", accessMethodName))); + if (stmt->global && strcmp(accessMethodName, "btree") != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("access method \"%s\" does not support global indexes", + accessMethodName))); amcanorder = amRoutine->amcanorder; amoptions = amRoutine->amoptions; @@ -957,10 +1001,9 @@ DefineIndex(Oid tableId, * violate uniqueness by putting values that ought to be unique in * different partitions. * - * We could lift this limitation if we had global indexes, but those have - * their own problems, so this is a useful feature combination. + * If we are creating a global index the we do not have this problem. */ - if (partitioned && (stmt->unique || exclusion)) + if (partitioned && !stmt->global && (stmt->unique || exclusion)) { PartitionKey key = RelationGetPartitionKey(rel); const char *constraint_type; @@ -1110,7 +1153,7 @@ DefineIndex(Oid tableId, { AttrNumber attno = indexInfo->ii_IndexAttrNumbers[i]; - if (attno < 0) + if (attno < 0 && !stmt->global) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("index creation on system columns is not supported"))); @@ -1212,16 +1255,18 @@ DefineIndex(Oid tableId, flags = constr_flags = 0; if (stmt->isconstraint) flags |= INDEX_CREATE_ADD_CONSTRAINT; - if (skip_build || concurrent || partitioned) + if (skip_build || concurrent || (partitioned && !stmt->global)) flags |= INDEX_CREATE_SKIP_BUILD; if (stmt->if_not_exists) flags |= INDEX_CREATE_IF_NOT_EXISTS; if (concurrent) flags |= INDEX_CREATE_CONCURRENT; - if (partitioned) + if (partitioned && !stmt->global) flags |= INDEX_CREATE_PARTITIONED; if (stmt->primary) flags |= INDEX_CREATE_IS_PRIMARY; + if (stmt->global) + flags |= INDEX_CREATE_GLOBAL; /* * If the table is partitioned, and recursion was declined but partitions @@ -1251,7 +1296,7 @@ DefineIndex(Oid tableId, coloptions, NULL, reloptions, flags, constr_flags, allowSystemTableMods, !check_rights, - &createdConstraintId); + &createdConstraintId, inheritors); ObjectAddressSet(address, RelationRelationId, indexRelationId); @@ -1289,7 +1334,13 @@ DefineIndex(Oid tableId, CreateComments(indexRelationId, RelationRelationId, 0, stmt->idxcomment); - if (partitioned) + /* + * If table is partitioned then create index on each partition. But if + * we are building a global index we don't need to create it on each + * partition, there will be just one global index which will hold data from + * all the children. + */ + if (partitioned && !stmt->global) { PartitionDesc partdesc; @@ -1523,6 +1574,7 @@ DefineIndex(Oid tableId, InvalidOid, /* no predefined OID */ indexRelationId, /* this is our child */ createdConstraintId, + NIL, -1, is_alter_table, check_rights, check_not_in_use, @@ -1935,9 +1987,21 @@ ComputeIndexAttrs(IndexInfo *indexInfo, Oid attcollation; /* - * Process the column-or-expression to be indexed. + * Process the column-or-expression to be indexed. For partition ID + * attribute both name and expr is set as NULL. And we can directly + * point to the predefine FormData_pg_attribute for the partition id + * attribute. */ - if (attribute->name != NULL) + if ((attribute->name == NULL) && (attribute->expr == NULL)) + { + const FormData_pg_attribute *attform; + + attform = &partitionid_attr; + indexInfo->ii_IndexAttrNumbers[attn] = attform->attnum; + atttype = attform->atttypid; + attcollation = attform->attcollation; + } + else if (attribute->name != NULL) { /* Simple index attribute */ HeapTuple atttuple; @@ -2673,7 +2737,7 @@ ChooseRelationName(const char *name1, const char *name2, static char * ChooseIndexName(const char *tabname, Oid namespaceId, const List *colnames, const List *exclusionOpNames, - bool primary, bool isconstraint) + bool primary, bool isconstraint, bool global) { char *indexname; @@ -2737,6 +2801,9 @@ ChooseIndexNameAddition(const List *colnames) { const char *name = (const char *) lfirst(lc); + if (strcmp(name, "partid") == 0) + continue; + if (buflen > 0) buf[buflen++] = '_'; /* insert _ between names */ @@ -2778,8 +2845,10 @@ ChooseIndexColumnNames(const List *indexElems) origname = ielem->indexcolname; /* caller-specified name */ else if (ielem->name) origname = ielem->name; /* simple column reference */ - else + else if (ielem->expr) origname = "expr"; /* default name for expression */ + else + origname = "partid"; /* If it conflicts with any previous column, tweak it */ curname = origname; @@ -2960,7 +3029,7 @@ ReindexIndex(const ReindexStmt *stmt, const ReindexParams *params, bool isTopLev ReindexParams newparams = *params; newparams.options |= REINDEXOPT_REPORT_PROGRESS; - reindex_index(stmt, indOid, false, persistence, &newparams); + reindex_index(stmt, indOid, false, persistence, &newparams, NULL); } } @@ -3010,7 +3079,8 @@ RangeVarCallbackForReindexIndex(const RangeVar *relation, if (!relkind) return; if (relkind != RELKIND_INDEX && - relkind != RELKIND_PARTITIONED_INDEX) + relkind != RELKIND_PARTITIONED_INDEX && + relkind != RELKIND_GLOBAL_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not an index", relation->relname))); @@ -3353,6 +3423,7 @@ ReindexPartitions(const ReindexStmt *stmt, Oid relid, const ReindexParams *param char *relnamespace = get_namespace_name(get_rel_namespace(relid)); MemoryContext reindex_context; List *inhoids; + List *parentreloids = NIL; ListCell *lc; ErrorContextCallback errcallback; ReindexErrorInfo errinfo; @@ -3403,10 +3474,16 @@ ReindexPartitions(const ReindexStmt *stmt, Oid relid, const ReindexParams *param /* * This discards partitioned tables, partitioned indexes and foreign - * tables. + * tables. However, we would rememeber the OIDs of the partittioned + * tables and reindex them later as they can also have global indexes. */ if (!RELKIND_HAS_STORAGE(partkind)) + { + if (partkind == RELKIND_PARTITIONED_TABLE) + parentreloids = lappend_oid(parentreloids, partoid); + continue; + } Assert(partkind == RELKIND_INDEX || partkind == RELKIND_RELATION); @@ -3423,6 +3500,9 @@ ReindexPartitions(const ReindexStmt *stmt, Oid relid, const ReindexParams *param */ ReindexMultipleInternal(stmt, partitions, params); + /* Reindex the global indexes. */ + ReindexPartitionedRelation(parentreloids, params); + /* * Clean up working storage --- note we must do this after * StartTransactionCommand, else we might be trying to delete the active @@ -3431,6 +3511,78 @@ ReindexPartitions(const ReindexStmt *stmt, Oid relid, const ReindexParams *param MemoryContextDelete(reindex_context); } +/* + * ReindexPartitionedRelation + * + * Reindex the list of partitioned relations. Partitioned relations can have + * global index so this will reindex global indexes directly defined on each + * partitioned relation. + */ +static void +ReindexPartitionedRelation(List *reloids, const ReindexParams *params) +{ + Relation rel; + + foreach_oid(relid, reloids) + { + List *indexoids; + + /* + * Open and lock the relation. ShareLock is sufficient since we only + * need to prevent schema and data changes in it. The lock level used + * here should match ReindexTable(). + */ + if ((params->options & REINDEXOPT_MISSING_OK) != 0) + rel = try_table_open(relid, ShareLock); + else + rel = table_open(relid, ShareLock); + + /* if relation is gone, leave */ + if (!rel) + continue; + + /* Only partitioned table must get here. */ + Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + + /* If relation doesn't have global index then skip it. */ + if (!rel->rd_rel->relhasglobalindex) + { + /* Close rel, but continue to hold the lock. */ + table_close(rel, NoLock); + continue; + } + + /* + * Get the list of indexes of the relation. Loop through all the + * indexes and reindex the indexes directly defined on the relation. + */ + indexoids = RelationGetIndexList(rel); + foreach_oid(indexoid, indexoids) + { + Oid heapId = IndexGetRelation(indexoid, + (params->options & + REINDEXOPT_MISSING_OK) != 0); + ReindexParams newparams = *params; + + /* + * if relation is missing, or the index is not defined on this + * relation directly then skip it as we do not want to reindex the + * global indexes defined on the parent relation. + */ + if (!OidIsValid(heapId) || heapId != relid) + continue; + + /* Partitioned relation can have only global indexes. */ + Assert(get_rel_relkind(indexoid) == RELKIND_GLOBAL_INDEX); + reindex_index(NULL, indexoid, false, rel->rd_rel->relpersistence, + &newparams, rel); + } + + /* Close rel, but continue to hold the lock. */ + table_close(rel, NoLock); + } +} + /* * ReindexMultipleInternal * @@ -3493,7 +3645,8 @@ ReindexMultipleInternal(const ReindexStmt *stmt, const List *relids, const Reind Assert(!RELKIND_HAS_PARTITIONS(relkind)); if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 && - relpersistence != RELPERSISTENCE_TEMP) + relpersistence != RELPERSISTENCE_TEMP && + !get_rel_has_globalindex(relid)) { ReindexParams newparams = *params; @@ -3509,7 +3662,7 @@ ReindexMultipleInternal(const ReindexStmt *stmt, const List *relids, const Reind newparams.options |= REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK; - reindex_index(stmt, relid, false, relpersistence, &newparams); + reindex_index(stmt, relid, false, relpersistence, &newparams, NULL); PopActiveSnapshot(); /* reindex_index() does the verbose output */ } @@ -3838,6 +3991,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein case RELKIND_PARTITIONED_TABLE: case RELKIND_PARTITIONED_INDEX: + case RELKIND_GLOBAL_INDEX: default: /* Return error if type of relation is not supported */ ereport(ERROR, @@ -4451,7 +4605,8 @@ IndexSetParentIndex(Relation partitionIdx, Oid parentOid) /* Make sure this is an index */ Assert(partitionIdx->rd_rel->relkind == RELKIND_INDEX || - partitionIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX); + partitionIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX || + partitionIdx->rd_rel->relkind == RELKIND_GLOBAL_INDEX); /* * Scan pg_inherits for rows linking our index to some parent. diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index b8837f26cb..875163aa28 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -101,6 +101,7 @@ #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/partcache.h" +#include "utils/rel.h" #include "utils/relcache.h" #include "utils/ruleutils.h" #include "utils/snapmgr.h" @@ -207,6 +208,9 @@ typedef struct AlteredTableInfo char *clusterOnIndex; /* index to use for CLUSTER */ List *changedStatisticsOids; /* OIDs of statistics to rebuild */ List *changedStatisticsDefs; /* string definitions of same */ + List *globalindexoids; /* OIDs of the global indexes from ancestors */ + List *partids; /* Partition ids for each global index oids in + globalindexoids */ } AlteredTableInfo; /* Struct describing one new constraint to check in Phase 3 scan */ @@ -307,6 +311,12 @@ static const struct dropmsgstrings dropmsgstringarray[] = { gettext_noop("index \"%s\" does not exist, skipping"), gettext_noop("\"%s\" is not an index"), gettext_noop("Use DROP INDEX to remove an index.")}, + {RELKIND_GLOBAL_INDEX, + ERRCODE_UNDEFINED_OBJECT, + gettext_noop("index \"%s\" does not exist"), + gettext_noop("index \"%s\" does not exist, skipping"), + gettext_noop("\"%s\" is not an index"), + gettext_noop("Use DROP INDEX to remove an index.")}, {'\0', 0, NULL, NULL, NULL, NULL} }; @@ -739,6 +749,7 @@ static List *GetParentedForeignKeyRefs(Relation partition); static void ATDetachCheckNoForeignKeyRefs(Relation partition); static char GetAttributeCompression(Oid atttypid, const char *compression); static char GetAttributeStorage(Oid atttypid, const char *storagemode); +static void LockPartitionsForGlobalIndex(Relation rel, LOCKMODE lockmode); /* ---------------------------------------------------------------- @@ -1277,13 +1288,15 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { - if (idxRel->rd_index->indisunique) + if (idxRel->rd_index->indisunique || + RelationIsGlobalIndex(idxRel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot create foreign partition of partitioned table \"%s\"", RelationGetRelationName(parent)), - errdetail("Table \"%s\" contains indexes that are unique.", - RelationGetRelationName(parent)))); + errdetail("Table \"%s\" contains indexes that are %s.", + RelationGetRelationName(parent), + idxRel->rd_index->indisunique ? "unique" : "global"))); else { index_close(idxRel, AccessShareLock); @@ -1291,6 +1304,26 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, } } + /* + * Global indexes are only exist on the partitioned table on which + * it is created so we don't need to copy it to child relation. + * However we need to attach this partition to the global index + * that will internally assign a partition id and insert mapping + * into pg_index_partition table. And also update the stats that + * relation has an index. + */ + if (RelationIsGlobalIndex(idxRel)) + { + List *inheritor = list_make1_oid(relationId); + + AttachParittionsToGlobalIndex(idxRel, inheritor); + + /* Update the stats that the relation has a index. */ + index_update_stats(rel, true, true, -1.0); + index_close(idxRel, AccessShareLock); + continue; + } + attmap = build_attrmap_by_name(RelationGetDescr(rel), RelationGetDescr(parent), false); @@ -1302,6 +1335,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, InvalidOid, RelationGetRelid(idxRel), constraintOid, + NIL, -1, false, false, false, false, false); @@ -1647,24 +1681,28 @@ RemoveRelations(DropStmt *drop) } /* - * Concurrent index drop cannot be used with partitioned indexes, - * either. + * Concurrent index drop cannot be used with partitioned indexes or + * global indexes. */ if ((flags & PERFORM_DELETION_CONCURRENTLY) != 0 && - state.actual_relkind == RELKIND_PARTITIONED_INDEX) + (state.actual_relkind == RELKIND_PARTITIONED_INDEX || + state.actual_relkind == RELKIND_GLOBAL_INDEX)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot drop partitioned index \"%s\" concurrently", - rel->relname))); + errmsg("cannot drop %s index \"%s\" concurrently", + (state.actual_relkind == RELKIND_GLOBAL_INDEX) ? + "global" : "partitioned", rel->relname))); /* - * If we're told to drop a partitioned index, we must acquire lock on - * all the children of its parent partitioned table before proceeding. - * Otherwise we'd try to lock the child index partitions before their - * tables, leading to potential deadlock against other sessions that - * will lock those objects in the other order. + * If we're told to drop a partitioned index or a global index, we must + * acquire lock on all the children of its parent partitioned table + * before proceeding. Otherwise we'd try to lock the child index + * partitions before their tables, leading to potential deadlock + * against other sessions that will lock those objects in the other + * order. */ - if (state.actual_relkind == RELKIND_PARTITIONED_INDEX) + if (state.actual_relkind == RELKIND_PARTITIONED_INDEX || + state.actual_relkind == RELKIND_GLOBAL_INDEX) (void) find_all_inheritors(state.heapOid, state.heap_lockmode, NULL); @@ -1751,6 +1789,8 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, expected_relkind = RELKIND_RELATION; else if (classform->relkind == RELKIND_PARTITIONED_INDEX) expected_relkind = RELKIND_INDEX; + else if (classform->relkind == RELKIND_GLOBAL_INDEX) + expected_relkind = RELKIND_INDEX; else expected_relkind = classform->relkind; @@ -1877,6 +1917,12 @@ ExecuteTruncate(TruncateStmt *stmt) /* open the relation, we already hold a lock on it */ rel = table_open(myrelid, NoLock); + /* + * Lock top level parent of the relation having global index and all + * its inheritos. + */ + LockPartitionsForGlobalIndex(rel, lockmode); + /* * RangeVarGetRelidExtended() has done most checks with its callback, * but other checks with the now-opened Relation remain. @@ -1980,6 +2026,7 @@ ExecuteTruncateGuts(List *explicit_rels, { List *rels; List *seq_relids = NIL; + List *index_oids = NIL; HTAB *ft_htab = NULL; EState *estate; ResultRelInfo *resultRelInfos; @@ -2043,6 +2090,28 @@ ExecuteTruncateGuts(List *explicit_rels, heap_truncate_check_FKs(rels, false); #endif + /* + * Process the list of relation and collect the list of all the index oids. + * this is required so that we after truncating all the relations we can + * reindex the global indexes just once. + */ + foreach(cell, rels) + { + Relation rel = (Relation) lfirst(cell); + List *oids; + + if (!rel->rd_rel->relhasglobalindex) + continue; + oids = RelationGetIndexList(rel); + + /* + * We need to use a unique concatenation, as there may be duplicate + * indexes across different partitions. This can happen if multiple + * partitions inherit the same global indexes from a common ancestor. + */ + index_oids = list_concat_unique_oid(index_oids, oids); + } + /* * If we are asked to restart sequences, find all the sequences, lock them * (we need AccessExclusiveLock for ResetSequence), and check permissions. @@ -2243,6 +2312,18 @@ ExecuteTruncateGuts(List *explicit_rels, pgstat_count_truncate(rel); } + /* Reindex global indexes */ + foreach_oid(indexoid, index_oids) + { + ReindexParams reindex_params = {0}; + + if (get_rel_relkind(indexoid) != RELKIND_GLOBAL_INDEX) + continue; + + reindex_index(NULL, indexoid, false, get_rel_persistence(indexoid), + &reindex_params, NULL); + } + /* Now go through the hash table, and truncate foreign tables */ if (ft_htab) { @@ -3804,6 +3885,7 @@ renameatt_check(Oid myrelid, Form_pg_class classform, bool recursing) relkind != RELKIND_COMPOSITE_TYPE && relkind != RELKIND_INDEX && relkind != RELKIND_PARTITIONED_INDEX && + relkind != RELKIND_GLOBAL_INDEX && relkind != RELKIND_FOREIGN_TABLE && relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, @@ -4237,7 +4319,8 @@ RenameRelation(RenameStmt *stmt) */ relkind = get_rel_relkind(relid); obj_is_index = (relkind == RELKIND_INDEX || - relkind == RELKIND_PARTITIONED_INDEX); + relkind == RELKIND_PARTITIONED_INDEX || + relkind == RELKIND_GLOBAL_INDEX); if (obj_is_index || is_index_stmt == obj_is_index) break; @@ -4304,7 +4387,8 @@ RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bo */ Assert(!is_index || is_index == (targetrelation->rd_rel->relkind == RELKIND_INDEX || - targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)); + targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX || + targetrelation->rd_rel->relkind == RELKIND_GLOBAL_INDEX)); /* * Update pg_class tuple with new relname. (Scribbling on reltup is OK @@ -4332,7 +4416,8 @@ RenameRelationInternal(Oid myrelid, const char *newrelname, bool is_internal, bo * Also rename the associated constraint, if any. */ if (targetrelation->rd_rel->relkind == RELKIND_INDEX || - targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + targetrelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX || + targetrelation->rd_rel->relkind == RELKIND_GLOBAL_INDEX) { Oid constraintId = get_index_constraint(myrelid); @@ -4417,6 +4502,7 @@ CheckTableNotInUse(Relation rel, const char *stmt) if (rel->rd_rel->relkind != RELKIND_INDEX && rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX && + rel->rd_rel->relkind != RELKIND_GLOBAL_INDEX && AfterTriggerPendingOnRel(RelationGetRelid(rel))) ereport(ERROR, (errcode(ERRCODE_OBJECT_IN_USE), @@ -6038,6 +6124,28 @@ ATRewriteTables(AlterTableStmt *parsetree, List **wqueue, LOCKMODE lockmode, } } + /* + * Rebuild global indexes. Each AlteredTableInfo contains the list of + * global index oids of ancestors which need to be rebuilt after this + * partition got attached. + */ + foreach(ltab, *wqueue) + { + AlteredTableInfo *tab = (AlteredTableInfo *) lfirst(ltab); + ReindexParams reindex_params = {0}; + + if (tab->globalindexoids == NIL) + continue; + + Assert(tab->relkind == RELKIND_PARTITIONED_TABLE); + + foreach_oid(indexoid, tab->globalindexoids) + { + reindex_index(NULL, indexoid, false, + get_rel_persistence(indexoid), &reindex_params, NULL); + } + } + /* * Foreign key constraints are checked in a final pass, since (a) it's * generally best to examine each one separately, and (b) it's at least @@ -6745,6 +6853,7 @@ ATSimplePermissions(AlterTableType cmdtype, Relation rel, int allowed_targets) actual_target = ATT_MATVIEW; break; case RELKIND_INDEX: + case RELKIND_GLOBAL_INDEX: actual_target = ATT_INDEX; break; case RELKIND_PARTITIONED_INDEX: @@ -8888,6 +8997,7 @@ ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newVa */ if (rel->rd_rel->relkind != RELKIND_INDEX && rel->rd_rel->relkind != RELKIND_PARTITIONED_INDEX && + rel->rd_rel->relkind != RELKIND_GLOBAL_INDEX && !colName) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), @@ -8967,7 +9077,8 @@ ATExecSetStatistics(Relation rel, const char *colName, int16 colNum, Node *newVa colName))); if (rel->rd_rel->relkind == RELKIND_INDEX || - rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + rel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX || + rel->rd_rel->relkind == RELKIND_GLOBAL_INDEX) { if (attnum > rel->rd_index->indnkeyatts) ereport(ERROR, @@ -9588,6 +9699,7 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel, bool check_rights; bool skip_build; bool quiet; + List *inheritors = NIL; ObjectAddress address; Assert(IsA(stmt, IndexStmt)); @@ -9603,11 +9715,15 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel, /* suppress notices when rebuilding existing index */ quiet = is_rebuild; + if (stmt->global) + inheritors = find_all_inheritors(RelationGetRelid(rel), NoLock, NULL); + address = DefineIndex(RelationGetRelid(rel), stmt, InvalidOid, /* no predefined OID */ InvalidOid, /* no parent index */ InvalidOid, /* no parent constraint */ + inheritors, -1, /* total_parts unknown */ true, /* is_alter_table */ check_rights, @@ -9634,6 +9750,9 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel, index_close(irel, NoLock); } + if (inheritors) + list_free(inheritors); + return address; } @@ -15049,7 +15168,8 @@ RememberAllDependentForRebuilding(AlteredTableInfo *tab, AlterTableType subtype, char relKind = get_rel_relkind(foundObject.objectId); if (relKind == RELKIND_INDEX || - relKind == RELKIND_PARTITIONED_INDEX) + relKind == RELKIND_PARTITIONED_INDEX || + relKind == RELKIND_GLOBAL_INDEX) { Assert(foundObject.objectSubId == 0); RememberIndexForRebuilding(foundObject.objectId, tab); @@ -16197,6 +16317,7 @@ ATExecChangeOwner(Oid relationOid, Oid newOwnerId, bool recursing, LOCKMODE lock if (tuple_class->relkind != RELKIND_COMPOSITE_TYPE && tuple_class->relkind != RELKIND_INDEX && tuple_class->relkind != RELKIND_PARTITIONED_INDEX && + tuple_class->relkind != RELKIND_GLOBAL_INDEX && tuple_class->relkind != RELKIND_TOASTVALUE) changeDependencyOnOwner(RelationRelationId, relationOid, newOwnerId); @@ -16648,6 +16769,7 @@ ATExecSetRelOptions(Relation rel, List *defList, AlterTableType operation, break; case RELKIND_INDEX: case RELKIND_PARTITIONED_INDEX: + case RELKIND_GLOBAL_INDEX: (void) index_reloptions(rel->rd_indam->amoptions, newOptions, true); break; case RELKIND_TOASTVALUE: @@ -16839,7 +16961,8 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace, LOCKMODE lockmode) newrlocator.spcOid = newTableSpace; /* hand off to AM to actually create new rel storage and copy the data */ - if (rel->rd_rel->relkind == RELKIND_INDEX) + if (rel->rd_rel->relkind == RELKIND_INDEX || + rel->rd_rel->relkind == RELKIND_GLOBAL_INDEX) { index_copy_data(rel, newrlocator); } @@ -17022,7 +17145,8 @@ AlterTableMoveAll(AlterTableMoveAllStmt *stmt) relForm->relkind != RELKIND_PARTITIONED_TABLE) || (stmt->objtype == OBJECT_INDEX && relForm->relkind != RELKIND_INDEX && - relForm->relkind != RELKIND_PARTITIONED_INDEX) || + relForm->relkind != RELKIND_PARTITIONED_INDEX && + relForm->relkind != RELKIND_GLOBAL_INDEX) || (stmt->objtype == OBJECT_MATVIEW && relForm->relkind != RELKIND_MATVIEW)) continue; @@ -19612,8 +19736,8 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, errmsg("\"%s\" is not a composite type", rv->relname))); if (reltype == OBJECT_INDEX && relkind != RELKIND_INDEX && - relkind != RELKIND_PARTITIONED_INDEX - && !IsA(stmt, RenameStmt)) + relkind != RELKIND_PARTITIONED_INDEX && + relkind != RELKIND_GLOBAL_INDEX && !IsA(stmt, RenameStmt)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not an index", rv->relname))); @@ -19636,7 +19760,8 @@ RangeVarCallbackForAlterRelation(const RangeVar *rv, Oid relid, Oid oldrelid, */ if (IsA(stmt, AlterObjectSchemaStmt)) { - if (relkind == RELKIND_INDEX || relkind == RELKIND_PARTITIONED_INDEX) + if (relkind == RELKIND_INDEX || relkind == RELKIND_PARTITIONED_INDEX || + relkind == RELKIND_GLOBAL_INDEX) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot change schema of index \"%s\"", @@ -20174,6 +20299,160 @@ QueuePartitionConstraintValidation(List **wqueue, Relation scanrel, } } +/* + * AttachParittionsToGlobalIndex - Attach relation oids to the global index + * + * Thi will create the mapping for the input 'reloids' to global index oid of + * the input relation pointed by 'irel'. + */ +void +AttachParittionsToGlobalIndex(Relation irel, List *reloids) +{ + /* + * Loop through OID of each relation and attach to the global indexes. + */ + foreach_oid(childoid, reloids) + { + /* Caller should be holding the lock for all the children. */ + Relation childrel = table_open(childoid, NoLock); + PartitionId partid; + + /* + * Allocate the partition ID for this partition with respect to the + * the global index and insert the mapping into the index partition + * table. + */ + partid = IndexGetNextPartitionID(irel); + InsertIndexPartitionEntry(irel, childoid, partid); + + table_close(childrel, NoLock); + } +} + +/* + * AttachToGlobalIndexes - Attach base relation(s) to ancestor's global indexes + * + * This function creates the mapping for the attached relation to all the + * global indexes present on the partitioned table we are attaching to, as well + * as all its ancestors. The process involves assigning a partition ID for each + * global index on the ancestors and making an entry in the pg_index_partitions + * table. If the relation being attached is also partitioned, the function + * recursively traverses all its children to create mappings for the base + * relations. Note that this mapping is required only for the base relations, + * as they are the ones that can contain tuples. + */ +static void +AttachToGlobalIndexes(List **wqueue, Relation rel, List *reloids) +{ + List *indexoids; + List *globalindexoids = NIL; + bool hasglobalindex = false; + AlteredTableInfo *tab; + + /* + * Retrieve the list of all indexes from the parent to which we are + * attaching. The parent relation's index list will also include all the + * global indexes of its ancestors. + */ + indexoids = RelationGetIndexList(rel); + + /* Quick exit if there is no index on the parent relation. */ + if (indexoids == NIL) + return; + + /* + * Loop through each indexoid and create mapping for the all the reloids + * if this is a global index. + */ + foreach_oid(indexoid, indexoids) + { + Relation irel = index_open(indexoid, RowExclusiveLock); + + /* We don't need to do anything if this is not a global index. */ + if (!RelationIsGlobalIndex(irel)) + { + table_close(irel, RowExclusiveLock); + continue; + } + + globalindexoids = lappend_oid(globalindexoids, indexoid); + + /* Flag to indicate that we have at least one global index. */ + hasglobalindex = true; + + /* Attach reloids to the global index. */ + AttachParittionsToGlobalIndex(irel, reloids); + + /* + * Invalidate the index relation cache of the global index so that its + * gets recreated and the newly attached partitions get reflected in + * the cache. + */ + CacheInvalidateRelcache(irel); + + /* Close the index relation, keep the lock till end of transaction */ + table_close(irel, NoLock); + } + + /* + * Loop through each partition and update the stats that the relation has + * a index. + */ + if (hasglobalindex) + { + foreach_oid(childoid, reloids) + { + Relation childrel; + + /* Lock already held by caller. */ + childrel = table_open(childoid, NoLock); + index_update_stats(childrel, true, true, -1.0); + table_close(childrel, NoLock); + } + } + + tab = ATGetQueueEntry(wqueue, rel); + tab->globalindexoids = globalindexoids; + + /* Free the indexoids list memory. */ + list_free(indexoids); +} + +/* + * DetachFromGlobalIndexes - Detach reloids from all global indexes + * + * Invalidate the mapping in pg_index_partitions for input 'indexoids' and + * 'reloids'. + */ +void +DetachFromGlobalIndexes(List *indexoids, List *reloids) +{ + foreach_oid(indexoid, indexoids) + { + Relation irel = index_open(indexoid, AccessExclusiveLock); + + /* + * There will not be any mapping if this is not a global index so + * continue with the nexr entry. + */ + if (!RelationIsGlobalIndex(irel)) + { + index_close(irel, AccessExclusiveLock); + continue; + } + + /* Invalidate the mapping for the global index to reloids. */ + InvalidateIndexPartitionEntries(reloids, indexoid); + + /* + * Invalidate the index relation cache so that the dropped relation + * information is reflected in the cache. + */ + CacheInvalidateRelcache(irel); + index_close(irel, AccessExclusiveLock); + } +} + /* * ALTER TABLE ATTACH PARTITION FOR VALUES * @@ -20200,6 +20479,12 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd, pstate->p_sourcetext = context->queryString; + /* + * If the relation to which we are attaching has mark as it has global + * index, lock all the inheritors which are covered by the global index. + */ + LockPartitionsForGlobalIndex(rel, AccessExclusiveLock); + /* * We must lock the default partition if one exists, because attaching a * new partition will change its partition constraint. @@ -20472,6 +20757,9 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd, ObjectAddressSet(address, RelationRelationId, RelationGetRelid(attachrel)); + /* Attach partition to the global indexes. */ + AttachToGlobalIndexes(wqueue, rel, attachrel_children); + /* * If the partition we just attached is partitioned itself, invalidate * relcache for all descendent partitions too to ensure that their @@ -20546,14 +20834,16 @@ AttachPartitionEnsureIndexes(List **wqueue, Relation rel, Relation attachrel) Relation idxRel = index_open(idx, AccessShareLock); if (idxRel->rd_index->indisunique || - idxRel->rd_index->indisprimary) + idxRel->rd_index->indisprimary || + RelationIsGlobalIndex(idxRel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot attach foreign table \"%s\" as partition of partitioned table \"%s\"", RelationGetRelationName(attachrel), RelationGetRelationName(rel)), - errdetail("Partitioned table \"%s\" contains unique indexes.", - RelationGetRelationName(rel)))); + errdetail("Partitioned table \"%s\" contains %s indexes.", + RelationGetRelationName(rel), + RelationIsGlobalIndex(idxRel) ? "global" : "unique"))); index_close(idxRel, AccessShareLock); } @@ -20664,6 +20954,7 @@ AttachPartitionEnsureIndexes(List **wqueue, Relation rel, Relation attachrel) DefineIndex(RelationGetRelid(attachrel), stmt, InvalidOid, RelationGetRelid(idxRel), conOid, + NIL, -1, true, false, false, false, false); } @@ -20879,6 +21170,15 @@ ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel, LockRelationOid(defaultPartOid, AccessExclusiveLock); } + /* + * Lock top level parent of the relation having global index and all its + * inheritos. + * + * XXX do we need AccessExclusiveLock on all the tables under the global + * index or only on the partitions which are being detached? + */ + LockPartitionsForGlobalIndex(rel, AccessExclusiveLock); + /* * In concurrent mode, the partition is locked with share-update-exclusive * in the first transaction. This allows concurrent transactions to be @@ -21035,6 +21335,7 @@ DetachPartitionFinalize(Relation rel, Relation partRel, bool concurrent, newtuple; Relation trigrel = NULL; List *fkoids = NIL; + List *children = NIL; if (concurrent) { @@ -21322,6 +21623,30 @@ DetachPartitionFinalize(Relation rel, Relation partRel, bool concurrent, CacheInvalidateRelcacheByRelid(defaultPartOid); } + /* + * When detaching a table, we also need to detach (invalidate mapping in + * pg_index_partition relation) this table from all the global indexes + * on the parent table and its ancestors from which the table is being + * detached. If the table being detached is itself partitioned, we must + * retrieve the list of all its inheritors and detach all the leaf tables + * under this partition from the global indexes of all ancestors. + */ + indexes = RelationGetIndexList(rel); + if (partRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + /* + * All inheritors are already locked so we don't need to lock it here. + */ + children = find_all_inheritors(RelationGetRelid(partRel), + NoLock, NULL); + } + else + children = list_make1_oid(RelationGetRelid(partRel)); + + /* Detach the relation and its children from ancestor's global indexes. */ + DetachFromGlobalIndexes(indexes, children); + list_free(indexes); + /* * Invalidate the parent's relcache so that the partition is no longer * included in its partition descriptor. @@ -21337,15 +21662,13 @@ DetachPartitionFinalize(Relation rel, Relation partRel, bool concurrent, */ if (partRel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { - List *children; - - children = find_all_inheritors(RelationGetRelid(partRel), - AccessExclusiveLock, NULL); foreach(cell, children) { CacheInvalidateRelcacheByRelid(lfirst_oid(cell)); } } + + list_free(children); } /* @@ -21541,7 +21864,8 @@ RangeVarCallbackForAttachIndex(const RangeVar *rv, Oid relOid, Oid oldRelOid, return; /* concurrently dropped, so nothing to do */ classform = (Form_pg_class) GETSTRUCT(tuple); if (classform->relkind != RELKIND_PARTITIONED_INDEX && - classform->relkind != RELKIND_INDEX) + classform->relkind != RELKIND_INDEX && + classform->relkind != RELKIND_GLOBAL_INDEX) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("\"%s\" is not an index", rv->relname))); @@ -22040,3 +22364,45 @@ GetAttributeStorage(Oid atttypid, const char *storagemode) return cstorage; } + +/* + * Lock top level parent of the relation having global index and all its + * inheritos. + */ +static void +LockPartitionsForGlobalIndex(Relation rel, LOCKMODE lockmode) +{ + List *indexoidlist; + List *parentreloids = NIL; + + /* Nothing to do if relhasglobalindex is false. */ + if (!rel->rd_rel->relhasglobalindex) + return; + + /* + * Loop the all the indexes and for each global index get the relation oid + * on which the global index is defined and lock that parent along with all + * its inheritors. + */ + indexoidlist = RelationGetIndexList(rel); + foreach_oid(indexid, indexoidlist) + { + Relation idxrel; + Oid reloid; + + idxrel = index_open(indexid, AccessShareLock); + if (!RelationIsGlobalIndex(idxrel)) + { + index_close(idxrel, AccessShareLock); + continue; + } + reloid = idxrel->rd_index->indrelid; + index_close(idxrel, AccessShareLock); + if (list_member_oid(parentreloids, reloid)) + continue; + + parentreloids = lappend_oid(parentreloids, reloid); + LockRelationOid(reloid, lockmode); + (void) find_all_inheritors(reloid, lockmode, NULL); + } +} diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 733ef40ae7..435ac0d850 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -35,6 +35,7 @@ #include "access/transam.h" #include "access/xact.h" #include "catalog/namespace.h" +#include "catalog/partition.h" #include "catalog/pg_database.h" #include "catalog/pg_inherits.h" #include "commands/cluster.h" @@ -59,6 +60,7 @@ #include "utils/injection_point.h" #include "utils/memutils.h" #include "utils/snapmgr.h" +#include "utils/lsyscache.h" #include "utils/syscache.h" /* @@ -2365,6 +2367,10 @@ vac_open_indexes(Relation relation, LOCKMODE lockmode, Assert(lockmode != NoLock); + /* + * Get list of all the indexes including the global indexes of all its + * ancestors. + */ indexoidlist = RelationGetIndexList(relation); /* allocate enough memory for all indexes */ @@ -2383,7 +2389,8 @@ vac_open_indexes(Relation relation, LOCKMODE lockmode, Relation indrel; indrel = index_open(indexoid, lockmode); - if (indrel->rd_index->indisready) + if (indrel->rd_index->indisready && + indrel->rd_index->indrelid == RelationGetRelid(relation)) (*Irel)[i++] = indrel; else index_close(indrel, lockmode); diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index ca33a85427..b543a5d683 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -107,13 +107,16 @@ #include "postgres.h" #include "access/genam.h" +#include "access/relation.h" #include "access/relscan.h" #include "access/tableam.h" #include "access/xact.h" #include "catalog/index.h" +#include "catalog/partition.h" #include "executor/executor.h" #include "nodes/nodeFuncs.h" #include "storage/lmgr.h" +#include "utils/lsyscache.h" #include "utils/multirangetypes.h" #include "utils/rangetypes.h" #include "utils/snapmgr.h" @@ -174,9 +177,11 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative) return; /* - * Get cached list of index OIDs + * Get list of all the indexes including the global indexes of all its + * ancestors. */ indexoidlist = RelationGetIndexList(resultRelation); + len = list_length(indexoidlist); if (len == 0) return; @@ -213,6 +218,14 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative) /* extract index key information from the index's pg_index info */ ii = BuildIndexInfo(indexDesc); + /* + * Fetch partition ID of the relation for the global index. For more + * details refer comments atop IndexInfo. + */ + if (RelationIsGlobalIndex(indexDesc)) + ii->ii_partid = IndexGetRelationPartitionId(indexDesc, + RelationGetRelid(resultRelation)); + /* * If the indexes are to be used for speculative insertion, add extra * information required by unique index entries. diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 59233b6473..c716f9a6fe 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -268,6 +268,15 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, continue; } + /* + * TODO: Global index scan paths are not yet supported. + */ + if (RelationIsGlobalIndex(indexRelation)) + { + index_close(indexRelation, NoLock); + continue; + } + /* * If the index is valid, but cannot yet be used, ignore it; but * mark the plan we are generating as transient. See diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 50f53159d5..1c8db4fde9 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -487,7 +487,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type unicode_normal_form %type opt_instead -%type opt_unique opt_verbose opt_full +%type opt_unique opt_verbose opt_full opt_global %type opt_freeze opt_analyze opt_default %type opt_binary copy_delimiter @@ -8188,7 +8188,7 @@ defacl_privilege_target: IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_single_name ON relation_expr access_method_clause '(' index_params ')' - opt_include opt_unique_null_treatment opt_reloptions OptTableSpace where_clause + opt_include opt_unique_null_treatment opt_reloptions opt_global OptTableSpace where_clause { IndexStmt *n = makeNode(IndexStmt); @@ -8201,8 +8201,9 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_single_name n->indexIncludingParams = $12; n->nulls_not_distinct = !$13; n->options = $14; - n->tableSpace = $15; - n->whereClause = $16; + n->global = $15; + n->tableSpace = $16; + n->whereClause = $17; n->excludeOpNames = NIL; n->idxcomment = NULL; n->indexOid = InvalidOid; @@ -8220,7 +8221,7 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_single_name } | CREATE opt_unique INDEX opt_concurrently IF_P NOT EXISTS name ON relation_expr access_method_clause '(' index_params ')' - opt_include opt_unique_null_treatment opt_reloptions OptTableSpace where_clause + opt_include opt_unique_null_treatment opt_reloptions opt_global OptTableSpace where_clause { IndexStmt *n = makeNode(IndexStmt); @@ -8233,8 +8234,9 @@ IndexStmt: CREATE opt_unique INDEX opt_concurrently opt_single_name n->indexIncludingParams = $15; n->nulls_not_distinct = !$16; n->options = $17; - n->tableSpace = $18; - n->whereClause = $19; + n->global = $18; + n->tableSpace = $19; + n->whereClause = $20; n->excludeOpNames = NIL; n->idxcomment = NULL; n->indexOid = InvalidOid; @@ -8257,6 +8259,11 @@ opt_unique: | /*EMPTY*/ { $$ = false; } ; +opt_global: + GLOBAL { $$ = true; } + | /*EMPTY*/ { $$ = false; } + ; + access_method_clause: USING name { $$ = $2; } | /*EMPTY*/ { $$ = DEFAULT_INDEX_TYPE; } diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index afcf54169c..d354f44e66 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -1762,6 +1762,7 @@ generateClonedIndexStmt(RangeVar *heapRel, Relation source_idx, index->unique = idxrec->indisunique; index->nulls_not_distinct = idxrec->indnullsnotdistinct; index->primary = idxrec->indisprimary; + index->global = (idxrelrec->relkind == RELKIND_GLOBAL_INDEX); index->iswithoutoverlaps = (idxrec->indisprimary || idxrec->indisunique) && idxrec->indisexclusion; index->transformed = true; /* don't need transformIndexStmt */ index->concurrent = false; @@ -1880,6 +1881,13 @@ generateClonedIndexStmt(RangeVar *heapRel, Relation source_idx, keyno); int16 opt = source_idx->rd_indoption[keyno]; + /* + * We don't need to copy PartitionIdAttributeNumber as this will be + * internally added by DefineIndex while creating a global index. + */ + if (attnum == PartitionIdAttributeNumber) + continue; + iparam = makeNode(IndexElem); if (AttributeNumberIsValid(attnum)) @@ -2528,6 +2536,8 @@ transformIndexConstraint(Constraint *constraint, CreateStmtContext *cxt) Assert(attnum <= heap_rel->rd_att->natts); attform = TupleDescAttr(heap_rel->rd_att, attnum - 1); } + else if (attnum == PartitionIdAttributeNumber) + continue; else attform = SystemAttributeDefinition(attnum); attname = pstrdup(NameStr(attform->attname)); diff --git a/src/backend/statistics/stat_utils.c b/src/backend/statistics/stat_utils.c index a9a3224efe..8564a19cdb 100644 --- a/src/backend/statistics/stat_utils.c +++ b/src/backend/statistics/stat_utils.c @@ -148,7 +148,12 @@ stats_lock_check_privileges(Oid reloid) */ switch (get_rel_relkind(reloid)) { + /* + * FIXME, revalidate correct lock type for global index and update + * comments in README.tuplock and other relavent places. + */ case RELKIND_INDEX: + case RELKIND_GLOBAL_INDEX: index_oid = reloid; table_oid = IndexGetRelation(index_oid, false); index_lockmode = AccessShareLock; diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 25fe3d5801..a3a84c57f0 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1459,6 +1459,7 @@ ProcessUtilitySlow(ParseState *pstate, LOCKMODE lockmode; int nparts = -1; bool is_alter_table; + List *inheritors = NIL; if (stmt->concurrent) PreventInTransactionBlock(isTopLevel, @@ -1496,7 +1497,6 @@ ProcessUtilitySlow(ParseState *pstate, get_rel_relkind(relid) == RELKIND_PARTITIONED_TABLE) { ListCell *lc; - List *inheritors = NIL; inheritors = find_all_inheritors(relid, lockmode, NULL); foreach(lc, inheritors) @@ -1512,17 +1512,16 @@ ProcessUtilitySlow(ParseState *pstate, relkind, stmt->relation->relname); if (relkind == RELKIND_FOREIGN_TABLE && - (stmt->unique || stmt->primary)) + (stmt->unique || stmt->primary || stmt->global)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("cannot create unique index on partitioned table \"%s\"", - stmt->relation->relname), + errmsg("cannot create %s index on partitioned table \"%s\"", + stmt->global ? "global" : "unique", stmt->relation->relname), errdetail("Table \"%s\" contains partitions that are foreign tables.", stmt->relation->relname))); } /* count direct and indirect children, but not rel */ nparts = list_length(inheritors) - 1; - list_free(inheritors); } /* @@ -1547,6 +1546,7 @@ ProcessUtilitySlow(ParseState *pstate, InvalidOid, /* no predefined OID */ InvalidOid, /* no parent index */ InvalidOid, /* no parent constraint */ + inheritors, /* list of inheritor's OID */ nparts, /* # of partitions, or -1 */ is_alter_table, true, /* check_rights */ @@ -1554,6 +1554,8 @@ ProcessUtilitySlow(ParseState *pstate, false, /* skip_build */ false); /* quiet */ + list_free(inheritors); + /* * Add the CREATE INDEX node itself to stash right away; * if there were any commands stashed in the ALTER TABLE diff --git a/src/backend/utils/adt/amutils.c b/src/backend/utils/adt/amutils.c index 0af26d6acf..54b3a0d6b9 100644 --- a/src/backend/utils/adt/amutils.c +++ b/src/backend/utils/adt/amutils.c @@ -173,7 +173,8 @@ indexam_property(FunctionCallInfo fcinfo, PG_RETURN_NULL(); rd_rel = (Form_pg_class) GETSTRUCT(tuple); if (rd_rel->relkind != RELKIND_INDEX && - rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + rd_rel->relkind != RELKIND_PARTITIONED_INDEX && + rd_rel->relkind != RELKIND_GLOBAL_INDEX) { ReleaseSysCache(tuple); PG_RETURN_NULL(); diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 3d6e6bdbfd..92ef04533c 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -1401,6 +1401,7 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, Oid keycoltype; Oid keycolcollation; + /* * Ignore non-key attributes if told to. */ @@ -1414,6 +1415,10 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, sep = ""; } + /* Ignore internal PartitionIdAttributeNumber. */ + if (attnum == PartitionIdAttributeNumber) + continue; + if (!colno) appendStringInfoString(&buf, sep); sep = ", "; @@ -1514,6 +1519,12 @@ pg_get_indexdef_worker(Oid indexrelid, int colno, if (idxrec->indnullsnotdistinct) appendStringInfoString(&buf, " NULLS NOT DISTINCT"); + /* + * If this is a global index, append "GLOBAL" + */ + if (idxrelrec->relkind == RELKIND_GLOBAL_INDEX) + appendStringInfoString(&buf, " GLOBAL"); + /* * If it has options, append "WITH (options)" */ diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index c460a72b75..ac99d8e608 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -2253,6 +2253,27 @@ get_rel_relam(Oid relid) return result; } +/* + * get_rel_has_globalindex + * + * Returns whether the relation has global index or not. + */ +bool +get_rel_has_globalindex(Oid relid) +{ + HeapTuple tp; + Form_pg_class reltup; + char result; + + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for relation %u", relid); + reltup = (Form_pg_class) GETSTRUCT(tp); + result = reltup->relhasglobalindex; + ReleaseSysCache(tp); + + return result; +} /* ---------- TRANSFORM CACHE ---------- */ diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 559ba9cdb2..485c3fd223 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -33,6 +33,7 @@ #include "access/htup_details.h" #include "access/multixact.h" #include "access/parallel.h" +#include "access/relation.h" #include "access/reloptions.h" #include "access/sysattr.h" #include "access/table.h" @@ -51,6 +52,7 @@ #include "catalog/pg_authid.h" #include "catalog/pg_constraint.h" #include "catalog/pg_database.h" +#include "catalog/pg_inherits.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" #include "catalog/pg_proc.h" @@ -487,6 +489,7 @@ RelationParseRelOptions(Relation relation, HeapTuple tuple) break; case RELKIND_INDEX: case RELKIND_PARTITIONED_INDEX: + case RELKIND_GLOBAL_INDEX: amoptsfn = relation->rd_indam->amoptions; break; default: @@ -1223,7 +1226,8 @@ retry: * initialize access method information */ if (relation->rd_rel->relkind == RELKIND_INDEX || - relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) + relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX || + relation->rd_rel->relkind == RELKIND_GLOBAL_INDEX) RelationInitIndexAccessInfo(relation); else if (RELKIND_HAS_TABLE_AM(relation->rd_rel->relkind) || relation->rd_rel->relkind == RELKIND_SEQUENCE) @@ -1588,6 +1592,14 @@ RelationInitIndexAccessInfo(Relation relation) (void) RelationGetIndexAttOptions(relation, false); + /* + * If this is a global index then also build cache for partition id to + * relation oid mapping. For more details about this mapping read comments + * atop IndexPartitionInfoData. + */ + if (RelationIsGlobalIndex(relation)) + BuildIndexPartitionInfo(relation, indexcxt); + /* * expressions, predicate, exclusion caches will be filled later */ @@ -2281,7 +2293,8 @@ RelationReloadIndexInfo(Relation relation) /* Should be called only for invalidated, live indexes */ Assert((relation->rd_rel->relkind == RELKIND_INDEX || - relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX) && + relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX || + relation->rd_rel->relkind == RELKIND_GLOBAL_INDEX) && !relation->rd_isvalid && relation->rd_droppedSubid == InvalidSubTransactionId); @@ -4926,6 +4939,18 @@ RelationGetIndexList(Relation relation) systable_endscan(indscan); + /* + * If this relation potentially has global indexes on itself or on any of + * its ancestors, retrieve the list of all global indexes. + */ + if (RelationGetForm(relation)->relhasglobalindex) + { + List *globalindexoids = IndexPartitionRelidGetGlobalIndexOids( + RelationGetRelid(relation)); + + result = list_concat_unique_oid(result, globalindexoids); + } + table_close(indrel, AccessShareLock); /* Sort the result list into OID order, per API spec. */ @@ -5339,7 +5364,8 @@ RelationGetIndexAttrBitmap(Relation relation, IndexAttrBitmapKind attrKind) return NULL; /* - * Get cached list of index OIDs. If we have to start over, we do so here. + * Get list of all the indexes including the global indexes of all its + * ancestors. If we have to start over, we do so here. */ restart: indexoidlist = RelationGetIndexList(relation); @@ -5451,8 +5477,11 @@ restart: * Obviously, non-key columns couldn't be referenced by foreign * key or identity key. Hence we do not include them into * uindexattrs, pkindexattrs and idindexattrs bitmaps. + * + * Also ignore the parittion ID attribute as this is an internal + * attribute added for the global indexes. */ - if (attrnum != 0) + if (attrnum != 0 && attrnum != PartitionIdAttributeNumber) { *attrs = bms_add_member(*attrs, attrnum - FirstLowInvalidHeapAttributeNumber); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 1937997ea6..7d378facef 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -5688,6 +5688,7 @@ binary_upgrade_set_pg_class_oids(Archive *fout, "\n-- For binary upgrade, must preserve pg_class oids and relfilenodes\n"); if (entry->relkind != RELKIND_INDEX && + entry->relkind != RELKIND_GLOBAL_INDEX && entry->relkind != RELKIND_PARTITIONED_INDEX) { appendPQExpBuffer(upgrade_buffer, diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index dd25d2fe7b..778ec2815c 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -1989,7 +1989,12 @@ describeOneTableDetails(const char *schemaname, } appendPQExpBufferStr(&buf, "\nFROM pg_catalog.pg_attribute a"); - appendPQExpBuffer(&buf, "\nWHERE a.attrelid = '%s' AND a.attnum > 0 AND NOT a.attisdropped", oid); + + /* + * FIXME: partid column should be avoided only for global indexes. And + * shall we restrict usage of partid column. + */ + appendPQExpBuffer(&buf, "\nWHERE a.attrelid = '%s' AND a.attnum > 0 AND NOT a.attisdropped AND a.attname != 'partid'", oid); appendPQExpBufferStr(&buf, "\nORDER BY a.attnum;"); res = PSQLexec(buf.data); @@ -2052,6 +2057,14 @@ describeOneTableDetails(const char *schemaname, printfPQExpBuffer(&title, _("Partitioned table \"%s.%s\""), schemaname, relationname); break; + case RELKIND_GLOBAL_INDEX: + if (tableinfo.relpersistence == 'u') + printfPQExpBuffer(&title, _("Unlogged global index \"%s.%s\""), + schemaname, relationname); + else + printfPQExpBuffer(&title, _("Global index \"%s.%s\""), + schemaname, relationname); + break; default: /* untranslated unknown relkind */ printfPQExpBuffer(&title, "?%c? \"%s.%s\"", diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index e709d2e0af..cf7ddb0131 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -21,6 +21,7 @@ #include "access/xlogreader.h" #include "catalog/pg_am_d.h" #include "catalog/pg_index.h" +#include "common/int.h" #include "lib/stringinfo.h" #include "storage/bufmgr.h" #include "storage/shm_toc.h" @@ -655,6 +656,47 @@ BTreeTupleGetHeapTID(IndexTuple itup) return &itup->t_tid; } +/* + * Fetch partition ID store in the index tuple. + * + * For global indexes we store partition ID column as a last additional key + * column in order to identify which partition this index tuple belongs to. + */ +static inline PartitionId +BTreeTupleGetPartitionId(Relation index, IndexTuple itup) +{ + bool is_null; + Datum datum; + int partidattno = IndexRelationGetNumberOfKeyAttributes(index); + TupleDesc tupleDesc = RelationGetDescr(index); + + Assert(RelationIsGlobalIndex(index)); + + /* + * If this is a pivot tuple and tiebreaker partition id attribute is not + * present in it then return InvalidPartitionId. + */ + if (BTreeTupleIsPivot(itup) && BTreeTupleGetNAtts(itup, index) <= + IndexRelationGetNumberOfKeyAttributes(index)) + return InvalidPartitionId; + + /* Fetch partition id attribute from index tuple. */ + datum = index_getattr(itup, partidattno, tupleDesc, &is_null); + Assert(!is_null); + + return DatumGetPartitionId(datum); +} + +/* + * Get relation OID with respect to the partition ID stored in the IndexTuple. + */ +static inline Oid +BTreeTupleGetPartitionRelid(Relation index, IndexTuple itup) +{ + return IndexGetPartitionReloid(index, + BTreeTupleGetPartitionId(index, itup)); +} + /* * Get maximum heap TID attribute, which could be the only TID in the case of * a non-pivot tuple that does not have a posting list. @@ -676,6 +718,19 @@ BTreeTupleGetMaxHeapTID(IndexTuple itup) return &itup->t_tid; } +/* + * _bt_indexdel_cmp() -- qsort comparison function for _bt_simpledel_pass() in + * order to sort the items in partition ID order. + */ +static inline int +_bt_indexdel_cmp(const void *arg1, const void *arg2) +{ + PartidDeltidMapping *b1 = ((PartidDeltidMapping *) arg1); + PartidDeltidMapping *b2 = ((PartidDeltidMapping *) arg2); + + return pg_cmp_u32(b1->partid, b2->partid); +} + /* * Operator strategy numbers for B-tree have been moved to access/stratnum.h, * because many places need to use them in ScanKeyInit() calls. @@ -1156,7 +1211,8 @@ typedef struct BTOptions } BTOptions; #define BTGetFillFactor(relation) \ - (AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \ + (AssertMacro((relation->rd_rel->relkind == RELKIND_INDEX || \ + relation->rd_rel->relkind == RELKIND_GLOBAL_INDEX) && \ relation->rd_rel->relam == BTREE_AM_OID), \ (relation)->rd_options ? \ ((BTOptions *) (relation)->rd_options)->fillfactor : \ @@ -1164,7 +1220,8 @@ typedef struct BTOptions #define BTGetTargetPageFreeSpace(relation) \ (BLCKSZ * (100 - BTGetFillFactor(relation)) / 100) #define BTGetDeduplicateItems(relation) \ - (AssertMacro(relation->rd_rel->relkind == RELKIND_INDEX && \ + (AssertMacro((relation->rd_rel->relkind == RELKIND_INDEX || \ + relation->rd_rel->relkind == RELKIND_GLOBAL_INDEX) && \ relation->rd_rel->relam == BTREE_AM_OID), \ ((relation)->rd_options ? \ ((BTOptions *) (relation)->rd_options)->deduplicate_items : true)) @@ -1287,7 +1344,8 @@ extern void _bt_delitems_vacuum(Relation rel, Buffer buf, BTVacuumPosting *updatable, int nupdatable); extern void _bt_delitems_delete_check(Relation rel, Buffer buf, Relation heapRel, - TM_IndexDeleteOp *delstate); + TM_IndexDeleteOp *delstate, + PartidDeltidMapping *mapping); extern void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate); extern void _bt_pendingfsm_init(Relation rel, BTVacState *vstate, bool cleanuponly); diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 1c9e802a6b..cea236b340 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -17,6 +17,7 @@ #ifndef TABLEAM_H #define TABLEAM_H +#include "c.h" #include "access/relscan.h" #include "access/sdir.h" #include "access/xact.h" @@ -200,6 +201,11 @@ typedef struct TM_FailureData * ndeltids is 0 on return from call to tableam, in which case no index tuple * deletions are possible. Simple deletion callers can rely on any entries * they know to be deletable appearing in the final array as deletable. + * + * Note: For global indexes, the TID alone is insufficient to identify the + * heap tuple. We also need the partition ID that indicates which partition the + * TID belongs to. Later, when accessing the heap, the partition ID can be + * converted to the corresponding relation ID. */ typedef struct TM_IndexDelete { @@ -248,6 +254,20 @@ typedef struct TM_IndexDeleteOp TM_IndexStatus *status; } TM_IndexDeleteOp; +/* + * This maintain a entry with respect to each entry of *deltids in + * TM_IndexDeleteOp structure. For each entry it will keep the partition ID + * for that tid and the index into the *deltids array. We need this so that + * later we can sort deleted tids in partittion ID order in order to call the + * table AM method for checking the deleted tids status. + */ +typedef struct PartidDeltidMapping +{ + PartitionId partid; /* Partition ID of the entry in deltids array + in TM_IndexDeleteOp. */ + int idx; /* Index in deltids array in TM_IndexDeleteOp */ +} PartidDeltidMapping; + /* "options" flag bits for table_tuple_insert */ /* TABLE_INSERT_SKIP_WAL was 0x0001; RelationNeedsWAL() now governs */ #define TABLE_INSERT_SKIP_FSM 0x0002 diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 4daa8bef5e..da11c32179 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -64,7 +64,8 @@ extern void index_check_primary_key(Relation heapRel, #define INDEX_CREATE_CONCURRENT (1 << 3) #define INDEX_CREATE_IF_NOT_EXISTS (1 << 4) #define INDEX_CREATE_PARTITIONED (1 << 5) -#define INDEX_CREATE_INVALID (1 << 6) +#define INDEX_CREATE_GLOBAL (1 << 6) +#define INDEX_CREATE_INVALID (1 << 7) extern Oid index_create(Relation heapRelation, const char *indexRelationName, @@ -86,7 +87,8 @@ extern Oid index_create(Relation heapRelation, bits16 constr_flags, bool allow_system_table_mods, bool is_internal, - Oid *constraintId); + Oid *constraintId, + List *inheritors); #define INDEX_CONSTR_CREATE_MARK_AS_PRIMARY (1 << 0) #define INDEX_CONSTR_CREATE_DEFERRABLE (1 << 1) @@ -144,7 +146,10 @@ extern void index_build(Relation heapRelation, IndexInfo *indexInfo, bool isreindex, bool parallel); - +extern void index_update_stats(Relation rel, + bool hasindex, + bool hasglobalindex, + double reltuples); extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot); extern void index_set_state_flags(Oid indexId, IndexStateFlagsAction action); @@ -153,7 +158,7 @@ extern Oid IndexGetRelation(Oid indexId, bool missing_ok); extern void reindex_index(const ReindexStmt *stmt, Oid indexId, bool skip_constraint_checks, char persistence, - const ReindexParams *params); + const ReindexParams *params, Relation heapRelation); /* Flag bits for reindex_relation(): */ #define REINDEX_REL_PROCESS_TOAST 0x01 diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 07d182da79..1115963360 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -77,6 +77,12 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat /* T if has (or has had) any indexes */ bool relhasindex BKI_DEFAULT(f); + /* + * T if this rel or any of its ancestor has (or has had) any global + * indexes. + */ + bool relhasglobalindex BKI_DEFAULT(f); + /* T if shared across databases */ bool relisshared BKI_DEFAULT(f); @@ -174,6 +180,7 @@ MAKE_SYSCACHE(RELNAMENSP, pg_class_relname_nsp_index, 128); #define RELKIND_FOREIGN_TABLE 'f' /* foreign table */ #define RELKIND_PARTITIONED_TABLE 'p' /* partitioned table */ #define RELKIND_PARTITIONED_INDEX 'I' /* partitioned index */ +#define RELKIND_GLOBAL_INDEX 'g' /* global index */ #define RELPERSISTENCE_PERMANENT 'p' /* regular table */ #define RELPERSISTENCE_UNLOGGED 'u' /* unlogged permanent table */ @@ -202,7 +209,8 @@ MAKE_SYSCACHE(RELNAMENSP, pg_class_relname_nsp_index, 128); (relkind) == RELKIND_INDEX || \ (relkind) == RELKIND_SEQUENCE || \ (relkind) == RELKIND_TOASTVALUE || \ - (relkind) == RELKIND_MATVIEW) + (relkind) == RELKIND_MATVIEW || \ + (relkind) == RELKIND_GLOBAL_INDEX) #define RELKIND_HAS_PARTITIONS(relkind) \ ((relkind) == RELKIND_PARTITIONED_TABLE || \ diff --git a/src/include/catalog/pg_index_partitions.h b/src/include/catalog/pg_index_partitions.h index 2dcc8ca3fc..c2d952ef9b 100644 --- a/src/include/catalog/pg_index_partitions.h +++ b/src/include/catalog/pg_index_partitions.h @@ -45,6 +45,7 @@ CATALOG(pg_index_partitions,6015,IndexPartitionsRelationId) typedef FormData_pg_index_partitions *Form_pg_index_partitions; DECLARE_UNIQUE_INDEX_PKEY(pg_index_partitions_indexoid_partid_index, 6018, IndexPartitionsIndexId, pg_index_partitions, btree(indexoid oid_ops, partid int4_ops)); +DECLARE_INDEX(pg_index_partitions_reloid_index, 6019, IndexPartitionsReloidIndexId, pg_index_partitions, btree(reloid oid_ops)); /* * Map over the pg_index_partitions table for a particular global index. This @@ -74,6 +75,28 @@ typedef struct IndexPartitionInfoEntry #define FirstValidPartitionId 1 #define PartIdIsValid(partid) ((bool) ((partid) != InvalidPartitionId)) +/* + * The "partitionid" is a special purpose attribute this attribute is not have + * entry in the pg_attribute table. But this is just used for getting the + * FormData_pg_attribute entry for partition id attribute. + * + * TODO: We need to find some better way than doing this. + */ +#define PartitionIdAttributeNumber (-100) + +static const FormData_pg_attribute partitionid_attr = { + .attname = {""}, + .atttypid = INT4OID, + .attlen = sizeof(int32), + .attnum = PartitionIdAttributeNumber, + .atttypmod = -1, + .attbyval = true, + .attalign = TYPALIGN_INT, + .attstorage = TYPSTORAGE_PLAIN, + .attnotnull = true, + .attislocal = true, +}; + extern void BuildIndexPartitionInfo(Relation relation, MemoryContext context); extern PartitionId IndexGetRelationPartitionId(Relation irel, Oid reloid); extern Oid IndexGetPartitionReloid(Relation irel, PartitionId partid); @@ -81,4 +104,5 @@ extern PartitionId IndexGetNextPartitionID(Relation irel); extern void DeleteIndexPartitionEntries(Oid indrelid); extern void InsertIndexPartitionEntry(Relation irel, Oid reloid, PartitionId partid); extern void InvalidateIndexPartitionEntries(List *reloids, Oid indexoid); +extern List *IndexPartitionRelidGetGlobalIndexOids(Oid reloid); #endif /* PG_INDEX_PARTITIONS_H */ diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index dd22b5efdf..158ccc1773 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -30,6 +30,7 @@ extern ObjectAddress DefineIndex(Oid tableId, Oid indexRelationId, Oid parentIndexId, Oid parentConstraintId, + List *inheritors, int total_parts, bool is_alter_table, bool check_rights, diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index 6832470d38..956f617fbd 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -106,5 +106,6 @@ extern void RangeVarCallbackOwnsRelation(const RangeVar *relation, Oid relId, Oid oldRelId, void *arg); extern bool PartConstraintImpliedByRelConstraint(Relation scanrel, List *partConstraint); - +extern void AttachParittionsToGlobalIndex(Relation irel, List *reloids); +extern void DetachFromGlobalIndexes(List *indexoids, List *reloids); #endif /* TABLECMDS_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 2492282213..b7cec7b6fc 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -187,6 +187,15 @@ typedef struct ExprState * * ii_Concurrent, ii_BrokenHotChain, and ii_ParallelWorkers are used only * during index build; they're conventionally zeroed otherwise. + * + * ii_partid is only used during inserting a index tuple or index build. This + * holds the partition Id of a leaf partition for which we are currently + * inserting the tuple into the global index. + * + * XXX this is stored by caller where we have the information about currently + * for which partition we are inserting a tuple into the index and this is + * accessed by FormIndexDatum(). We may consider to pass this as a parameter + * to FormIndexDatum() or some other way of computing this. * ---------------- */ typedef struct IndexInfo @@ -216,6 +225,7 @@ typedef struct IndexInfo bool ii_WithoutOverlaps; int ii_ParallelWorkers; Oid ii_Am; + PartitionId ii_partid; void *ii_AmCache; MemoryContext ii_Context; } IndexInfo; diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index ba12678d1c..425d6b1386 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -3466,6 +3466,7 @@ typedef struct IndexStmt bool unique; /* is index unique? */ bool nulls_not_distinct; /* null treatment for UNIQUE constraints */ bool primary; /* is index a primary key? */ + bool global; /* is index a global index? */ bool isconstraint; /* is it for a pkey/unique constraint? */ bool iswithoutoverlaps; /* is the constraint WITHOUT OVERLAPS? */ bool deferrable; /* is the constraint DEFERRABLE? */ diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index fa7c7e0323..8d2804bdcf 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -147,6 +147,7 @@ extern bool get_rel_relispartition(Oid relid); extern Oid get_rel_tablespace(Oid relid); extern char get_rel_persistence(Oid relid); extern Oid get_rel_relam(Oid relid); +extern bool get_rel_has_globalindex(Oid relid); extern Oid get_transform_fromsql(Oid typid, Oid langid, List *trftypes); extern Oid get_transform_tosql(Oid typid, Oid langid, List *trftypes); extern bool get_typisdefined(Oid typid); diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 35270fdc05..1117f352e4 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -150,7 +150,16 @@ typedef struct RelationData MemoryContext rd_partcheckcxt; /* private cxt for rd_partcheck, if any */ /* data managed by RelationGetIndexList: */ - List *rd_indexlist; /* list of OIDs of indexes on relation */ + + /* + * List of OIDs of indexes on the relation, including the global indexes of + * all its ancestors. We include the ancestor's global indexes because any + * operation performed on this relation, such as insert or update, will + * also affect the global indexes of the ancestors so wherever we need to + * fetch the indexes we also need to fetch the global indexes of the + * ancestors. + */ + List *rd_indexlist; Oid rd_pkindex; /* OID of (deferrable?) primary key, if any */ bool rd_ispkdeferrable; /* is rd_pkindex a deferrable PK? */ Oid rd_replidindex; /* OID of replica identity index, if any */ @@ -722,6 +731,12 @@ RelationCloseSmgr(Relation relation) (relation)->rd_rel->relkind != RELKIND_FOREIGN_TABLE && \ !IsCatalogRelation(relation)) +/* + * Check whether the input relation is a global index or not. + */ +#define RelationIsGlobalIndex(relation) \ + ((relation)->rd_rel->relkind == RELKIND_GLOBAL_INDEX) + /* routines in utils/cache/relcache.c */ extern void RelationIncrementReferenceCount(Relation rel); extern void RelationDecrementReferenceCount(Relation rel); -- 2.49.0