From 8be88eaebd1beeb98ebaa49b3053009c6a0c6ed0 Mon Sep 17 00:00:00 2001 From: David Zhang Date: Thu, 17 Nov 2022 12:26:25 -0800 Subject: [PATCH 4/4] support global unique index insert and update --- src/backend/access/nbtree/nbtinsert.c | 30 +++++- src/backend/access/nbtree/nbtree.c | 123 ++++++++++++++++++++++++- src/include/access/nbtree.h | 5 + src/test/regress/expected/indexing.out | 41 +++++++++ src/test/regress/sql/indexing.sql | 20 ++++ 5 files changed, 213 insertions(+), 6 deletions(-) diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index f6f4af8bfe..ad95726ea9 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -34,7 +34,7 @@ static BTStack _bt_search_insert(Relation rel, BTInsertState insertstate); static TransactionId _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, IndexUniqueCheck checkUnique, bool *is_unique, - uint32 *speculativeToken); + uint32 *speculativeToken, Relation origHeapRel); static OffsetNumber _bt_findinsertloc(Relation rel, BTInsertState insertstate, bool checkingunique, @@ -73,6 +73,11 @@ static BlockNumber *_bt_deadblocks(Page page, OffsetNumber *deletable, int *nblocks); static inline int _bt_blk_cmp(const void *arg1, const void *arg2); +TransactionId _bt_check_unique_gi(Relation rel, BTInsertState insertstate, + Relation heapRel, + IndexUniqueCheck checkUnique, bool *is_unique, + uint32 *speculativeToken, Relation origHeapRel); + /* * _bt_doinsert() -- Handle insertion of a single index tuple in the tree. * @@ -206,7 +211,7 @@ search: uint32 speculativeToken; xwait = _bt_check_unique(rel, &insertstate, heapRel, checkUnique, - &is_unique, &speculativeToken); + &is_unique, &speculativeToken, NULL); if (unlikely(TransactionIdIsValid(xwait))) { @@ -379,6 +384,15 @@ _bt_search_insert(Relation rel, BTInsertState insertstate) NULL); } +TransactionId +_bt_check_unique_gi(Relation rel, BTInsertState insertstate, Relation heapRel, + IndexUniqueCheck checkUnique, bool *is_unique, + uint32 *speculativeToken, Relation origHeapRel) +{ + return _bt_check_unique(rel, insertstate, heapRel, checkUnique, + is_unique, speculativeToken, origHeapRel); +} + /* * _bt_check_unique() -- Check for violation of unique index constraint * @@ -405,7 +419,7 @@ _bt_search_insert(Relation rel, BTInsertState insertstate) static TransactionId _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, IndexUniqueCheck checkUnique, bool *is_unique, - uint32 *speculativeToken) + uint32 *speculativeToken, Relation origHeapRel) { IndexTuple itup = insertstate->itup; IndexTuple curitup = NULL; @@ -560,6 +574,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, &all_dead)) { TransactionId xwait; + bool idx_fetch_result; /* * It is a duplicate. If we are only doing a partial @@ -613,8 +628,13 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel, * entry. */ htid = itup->t_tid; - if (table_index_fetch_tuple_check(heapRel, &htid, - SnapshotSelf, NULL)) + if (origHeapRel) + idx_fetch_result = table_index_fetch_tuple_check(origHeapRel, &htid, + SnapshotSelf, NULL); + else + idx_fetch_result = table_index_fetch_tuple_check(heapRel, &htid, + SnapshotSelf, NULL); + if (idx_fetch_result) { /* Normal case --- it's still live */ } diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index b52eca8f38..84dc58ba38 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -23,6 +23,8 @@ #include "access/relscan.h" #include "access/xlog.h" #include "access/xloginsert.h" +#include "access/table.h" +#include "catalog/partition.h" #include "commands/progress.h" #include "commands/vacuum.h" #include "miscadmin.h" @@ -34,9 +36,11 @@ #include "storage/ipc.h" #include "storage/lmgr.h" #include "storage/smgr.h" +#include "storage/predicate.h" #include "utils/builtins.h" #include "utils/index_selfuncs.h" #include "utils/memutils.h" +#include "partitioning/partdesc.h" /* @@ -86,7 +90,9 @@ static BTVacuumPosting btreevacuumposting(BTVacState *vstate, IndexTuple posting, OffsetNumber updatedoffset, int *nremaining); - +static void + btinsert_check_unique_gi(IndexTuple itup, Relation idxRel, + Relation heapRel, IndexUniqueCheck checkUnique); /* * Btree handler function: return IndexAmRoutine with access method parameters @@ -177,6 +183,118 @@ btbuildempty(Relation index) smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM); } +/* + * btinsert_check_unique_gi() -- cross partitions uniqueness check. + * + * loop all partitions with global index for uniqueness check. + */ +static void +btinsert_check_unique_gi(IndexTuple itup, Relation idxRel, + Relation heapRel, IndexUniqueCheck checkUnique) +{ + bool is_unique = false; + BTScanInsert itup_key = _bt_mkscankey(idxRel, itup); + + if (!itup_key->anynullkeys && + idxRel->rd_rel->relkind == RELKIND_GLOBAL_INDEX) + { + Oid parentId; + Relation parentTbl; + PartitionDesc partDesc; + int i; + int nparts; + Oid *partOids; + + itup_key->scantid = NULL; + parentId = heapRel->rd_rel->relispartition ? + get_partition_parent(idxRel->rd_index->indrelid, false) : InvalidOid; + parentTbl = table_open(parentId, AccessShareLock); + partDesc = RelationGetPartitionDesc(parentTbl, true); + nparts = partDesc->nparts; + partOids = palloc(sizeof(Oid) * nparts); + memcpy(partOids, partDesc->oids, sizeof(Oid) * nparts); + for (i = 0; i < nparts; i++) + { + Oid childRelid = partOids[i]; + List *childidxs; + ListCell *cell; + + if (childRelid != heapRel->rd_rel->oid) + { + Relation hRel = table_open(childRelid, AccessShareLock); + + childidxs = RelationGetIndexList(hRel); + foreach(cell, childidxs) + { + Oid cldidxid = lfirst_oid(cell); + Relation iRel = index_open(cldidxid, AccessShareLock); + + if (iRel->rd_rel->relkind == RELKIND_GLOBAL_INDEX + && iRel->rd_rel->oid != idxRel->rd_rel->oid) + { + BTStack stack; + uint32 speculativeToken; + BTInsertStateData insertstate; + TransactionId xwait = InvalidBuffer; + + insertstate.itup = itup; + insertstate.itemsz = MAXALIGN(IndexTupleSize(itup)); + insertstate.itup_key = itup_key; + insertstate.bounds_valid = false; + insertstate.buf = InvalidBuffer; + insertstate.postingoff = 0; + + search_global: + stack = _bt_search(iRel, insertstate.itup_key, + &insertstate.buf, BT_READ, NULL); + xwait = _bt_check_unique_gi(iRel, &insertstate, + hRel, checkUnique, &is_unique, + &speculativeToken, heapRel); + if (unlikely(TransactionIdIsValid(xwait))) + { + /* Have to wait for the other guy ... */ + if (insertstate.buf) + { + _bt_relbuf(iRel, insertstate.buf); + insertstate.buf = InvalidBuffer; + } + + /* + * If it's a speculative insertion, wait for it to + * finish (ie. to go ahead with the insertion, or + * kill the tuple). Otherwise wait for the + * transaction to finish as usual. + */ + if (speculativeToken) + SpeculativeInsertionWait(xwait, speculativeToken); + else + XactLockTableWait(xwait, iRel, &itup->t_tid, XLTW_InsertIndex); + + /* start over... */ + if (stack) + _bt_freestack(stack); + goto search_global; + } + if (insertstate.buf) + _bt_relbuf(iRel, insertstate.buf); + if (stack) + _bt_freestack(stack); + } + index_close(iRel, AccessShareLock); + } + if (childidxs) + list_free(childidxs); + table_close(hRel, AccessShareLock); + } + } + if (partOids) + pfree(partOids); + table_close(parentTbl, AccessShareLock); + } + if (itup_key) + pfree(itup_key); +} + /* * btinsert() -- insert an index tuple into a btree. * @@ -199,6 +317,9 @@ btinsert(Relation rel, Datum *values, bool *isnull, result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel); + if (checkUnique != UNIQUE_CHECK_NO) + btinsert_check_unique_gi(itup, rel, heapRel, checkUnique); + pfree(itup); return result; diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 8e4f6864e5..19761a4c31 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -1284,4 +1284,9 @@ extern IndexBuildResult *btbuild(Relation heap, Relation index, struct IndexInfo *indexInfo); extern void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc); +extern TransactionId _bt_check_unique_gi(Relation rel, BTInsertState insertstate, + Relation heapRel, + IndexUniqueCheck checkUnique, bool *is_unique, + uint32 *speculativeToken, Relation origHeapRel); + #endif /* NBTREE_H */ diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out index 58de14c037..f0102fabe6 100644 --- a/src/test/regress/expected/indexing.out +++ b/src/test/regress/expected/indexing.out @@ -1459,6 +1459,47 @@ unique, btree, for table "public.gidxpart" Partitions: gidxpart1_b_idx, gidxpart2_b_idx +-- cross-partition uniqueness check for insert and update +insert into gidxpart values (1, 1, 'first'); +insert into gidxpart values (11, 11, 'eleventh'); +insert into gidxpart values (2, 11, 'duplicated (b)=(11) on other partition'); +ERROR: duplicate key value violates unique constraint "gidxpart2_b_idx" +DETAIL: Key (b)=(11) already exists. +insert into gidxpart values (12, 1, 'duplicated (b)=(1) on other partition'); +ERROR: duplicate key value violates unique constraint "gidxpart1_b_idx" +DETAIL: Key (b)=(1) already exists. +insert into gidxpart values (2, 120, 'second'); +insert into gidxpart values (12, 2, 'twelfth'); +update gidxpart set b=2 where a=2; +ERROR: duplicate key value violates unique constraint "gidxpart2_b_idx" +DETAIL: Key (b)=(2) already exists. +update gidxpart set b=1 where a=12; +ERROR: duplicate key value violates unique constraint "gidxpart1_b_idx" +DETAIL: Key (b)=(1) already exists. +update gidxpart set b=12 where a=12; +update gidxpart set b=2 where a=2; +select * from gidxpart; + a | b | c +----+----+---------- + 1 | 1 | first + 2 | 2 | second + 11 | 11 | eleventh + 12 | 12 | twelfth +(4 rows) + +-- cross-partition uniqueness check applys to newly created partition +create table gidxpart3 partition of gidxpart for values from (100) to (200); +select relname, relkind from pg_class where relname = 'gidxpart3_b_idx'; + relname | relkind +-----------------+--------- + gidxpart3_b_idx | g +(1 row) + +insert into gidxpart values (150, 11, 'duplicated (b)=(11) on other partition'); +ERROR: duplicate key value violates unique constraint "gidxpart2_b_idx" +DETAIL: Key (b)=(11) already exists. +insert into gidxpart values (150, 13, 'no duplicate b'); +-- clean up global index tests drop index gidx_u; drop table gidxpart; -- Test the cross-partition uniqueness with non-partition key with global unique index diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql index 78649bb5ca..42ee1ce19f 100644 --- a/src/test/regress/sql/indexing.sql +++ b/src/test/regress/sql/indexing.sql @@ -769,6 +769,26 @@ create unique index gidx_u on gidxpart using btree(b) global; select relname, relhasindex, relkind from pg_class where relname like '%gidx%' order by oid; \d+ gidxpart \d+ gidx_u +-- cross-partition uniqueness check for insert and update +insert into gidxpart values (1, 1, 'first'); +insert into gidxpart values (11, 11, 'eleventh'); +insert into gidxpart values (2, 11, 'duplicated (b)=(11) on other partition'); +insert into gidxpart values (12, 1, 'duplicated (b)=(1) on other partition'); +insert into gidxpart values (2, 120, 'second'); +insert into gidxpart values (12, 2, 'twelfth'); +update gidxpart set b=2 where a=2; +update gidxpart set b=1 where a=12; +update gidxpart set b=12 where a=12; +update gidxpart set b=2 where a=2; +select * from gidxpart; + +-- cross-partition uniqueness check applys to newly created partition +create table gidxpart3 partition of gidxpart for values from (100) to (200); +select relname, relkind from pg_class where relname = 'gidxpart3_b_idx'; +insert into gidxpart values (150, 11, 'duplicated (b)=(11) on other partition'); +insert into gidxpart values (150, 13, 'no duplicate b'); + +-- clean up global index tests drop index gidx_u; drop table gidxpart; -- 2.17.1