From 05c4e601b85be2fd79b642de2e1c194b5ad7ea80 Mon Sep 17 00:00:00 2001 From: Greg Burd Date: Sun, 26 Oct 2025 10:49:25 -0400 Subject: [PATCH v19 4/4] Enable HOT updates for expression and partial indexes Currently, PostgreSQL conservatively prevents HOT (Heap-Only Tuple) updates whenever any indexed column changes, even if the indexed portion of that column remains identical. This is overly restrictive for expression indexes (where f(column) might not change even when column changes) and partial indexes (where both old and new tuples might fall outside the predicate). This patch introduces several improvements to enable HOT updates in these cases: Add amcomparedatums callback to IndexAmRoutine. This allows index access methods like GIN to provide custom logic for comparing datums by extracting and comparing index keys rather than comparing the raw datums. GIN indexes now implement gincomparedatums() which extracts keys from both datums and compares the resulting key sets. Add ExecWhichIndexesRequireUpdates() to refine the set of modified attributes and determine precisely which indexes need updating. For partial indexes, this checks whether both old and new tuples satisfy or fail the predicate. For expression indexes, this uses type-specific equality operators to compare computed values. For extraction-based indexes (GIN/RUM), this delegates to amcomparedatums. Modify heap update paths to use the refined modified indexed attrs bitmapset returned by ExecWhichIndexesRequireUpdates(). This allows HOT updates when indexes don't actually require updating, while still preventing HOT updates when they do. Importantly, table access methods can still signal using TU_Update if all, none, or only summarizing indexes should be updated. While the executor layer now owns determining what has changed due to an update and is interested in only updating the minimum number of indexes possible, the table AM can override that while performing table_tuple_update(), which is what heap does. This optimization significantly improves update performance for tables with expression indexes, partial indexes, and GIN/GiST indexes on complex data types like JSONB and tsvector, while maintaining correct index semantics. Minimal additional overhead due to type-specific equality checking should be washed out by the benefits of updating indexes fewer times. --- src/backend/access/gin/ginutil.c | 94 ++- src/backend/access/heap/heapam.c | 10 +- src/backend/access/heap/heapam_handler.c | 6 +- src/backend/access/nbtree/nbtree.c | 38 ++ src/backend/access/table/tableam.c | 4 +- src/backend/bootstrap/bootstrap.c | 8 + src/backend/catalog/index.c | 57 ++ src/backend/catalog/indexing.c | 16 +- src/backend/catalog/toasting.c | 4 + src/backend/executor/execIndexing.c | 326 ++++++++- src/backend/executor/nodeModifyTable.c | 61 +- src/backend/nodes/makefuncs.c | 4 + src/include/access/amapi.h | 28 + src/include/access/gin.h | 3 + src/include/access/heapam.h | 6 +- src/include/access/nbtree.h | 4 + src/include/access/tableam.h | 8 +- src/include/catalog/index.h | 1 + src/include/executor/executor.h | 5 + src/include/nodes/execnodes.h | 19 + .../expected/hot_expression_indexes.out | 644 ++++++++++++++++++ src/test/regress/parallel_schedule | 6 + .../regress/sql/hot_expression_indexes.sql | 491 +++++++++++++ src/tools/pgindent/typedefs.list | 1 + 24 files changed, 1794 insertions(+), 50 deletions(-) create mode 100644 src/test/regress/expected/hot_expression_indexes.out create mode 100644 src/test/regress/sql/hot_expression_indexes.sql diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 78f7b7a2495..85e25ed73e8 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -26,6 +26,7 @@ #include "storage/indexfsm.h" #include "utils/builtins.h" #include "utils/index_selfuncs.h" +#include "utils/memutils.h" #include "utils/rel.h" #include "utils/typcache.h" @@ -78,6 +79,7 @@ ginhandler(PG_FUNCTION_ARGS) amroutine->amproperty = NULL; amroutine->ambuildphasename = ginbuildphasename; amroutine->amvalidate = ginvalidate; + amroutine->amcomparedatums = gincomparedatums; amroutine->amadjustmembers = ginadjustmembers; amroutine->ambeginscan = ginbeginscan; amroutine->amrescan = ginrescan; @@ -477,13 +479,6 @@ cmpEntries(const void *a, const void *b, void *arg) return res; } - -/* - * Extract the index key values from an indexable item - * - * The resulting key values are sorted, and any duplicates are removed. - * This avoids generating redundant index entries. - */ Datum * ginExtractEntries(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, @@ -729,3 +724,88 @@ ginbuildphasename(int64 phasenum) return NULL; } } + +/* + * gincomparedatums - Compare two datums to determine if they produce identical keys + * + * This function extracts keys from both old_datum and new_datum using the + * opclass's extractValue function, then compares the extracted key arrays. + * Returns true if the key sets are identical (same keys, same counts). + * + * This enables HOT updates for GIN indexes when the indexed portions of a + * value haven't changed, even if the value itself has changed. + * + * Example: JSONB column with GIN index. If an update changes a non-indexed + * key in the JSONB document, the extracted keys are identical and we can + * do a HOT update. + */ +bool +gincomparedatums(Relation index, int attnum, + Datum old_datum, bool old_isnull, + Datum new_datum, bool new_isnull) +{ + GinState ginstate; + Datum *old_keys; + Datum *new_keys; + GinNullCategory *old_categories; + GinNullCategory *new_categories; + int32 old_nkeys; + int32 new_nkeys; + MemoryContext tmpcontext; + MemoryContext oldcontext; + bool result = true; + + /* Handle NULL cases */ + if (old_isnull != new_isnull) + return false; + if (old_isnull) + return true; + + /* Create temporary context for extraction work */ + tmpcontext = AllocSetContextCreate(CurrentMemoryContext, + "GIN datum comparison", + ALLOCSET_DEFAULT_SIZES); + oldcontext = MemoryContextSwitchTo(tmpcontext); + + initGinState(&ginstate, index); + + /* + * Extract keys from both datums using existing GIN infrastructure. + */ + old_keys = ginExtractEntries(&ginstate, attnum, old_datum, old_isnull, + &old_nkeys, &old_categories); + new_keys = ginExtractEntries(&ginstate, attnum, new_datum, new_isnull, + &new_nkeys, &new_categories); + + /* Different number of keys → definitely different */ + if (old_nkeys != new_nkeys) + { + result = false; + goto cleanup; + } + + /* + * Compare the sorted key arrays element-by-element. Since both arrays are + * already sorted by ginExtractEntries, we can do a simple O(n) + * comparison. + */ + for (int i = 0; i < old_nkeys; i++) + { + int cmp = ginCompareEntries(&ginstate, attnum, + old_keys[i], old_categories[i], + new_keys[i], new_categories[i]); + + if (cmp != 0) + { + result = false; + break; + } + } + +cleanup: + /* Clean up */ + MemoryContextSwitchTo(oldcontext); + MemoryContextDelete(tmpcontext); + + return result; +} diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 1cdb72b3a7a..5b0ff13b13d 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -3268,7 +3268,7 @@ heap_update(Relation relation, HeapTupleData *oldtup, HeapTuple newtup, TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer, Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs, Bitmapset *sum_attrs, Bitmapset *pk_attrs, - Bitmapset *rid_attrs, Bitmapset *mix_attrs, Buffer *vmbuffer, + Bitmapset *rid_attrs, const Bitmapset *mix_attrs, Buffer *vmbuffer, bool rep_id_key_required, TU_UpdateIndexes *update_indexes) { TM_Result result; @@ -4337,8 +4337,9 @@ HeapDetermineColumnsInfo(Relation relation, * This routine may be used to update a tuple when concurrent updates of the * target tuple are not expected (for example, because we have a lock on the * relation associated with the tuple). Any failure is reported via ereport(). + * Returns the set of modified indexed attributes. */ -void +Bitmapset * simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tuple, TU_UpdateIndexes *update_indexes) { @@ -4467,7 +4468,7 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup elog(ERROR, "tuple concurrently deleted"); - return; + return NULL; } /* @@ -4500,7 +4501,6 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup bms_free(sum_attrs); bms_free(pk_attrs); bms_free(rid_attrs); - bms_free(mix_attrs); bms_free(idx_attrs); switch (result) @@ -4526,6 +4526,8 @@ simple_heap_update(Relation relation, const ItemPointerData *otid, HeapTuple tup elog(ERROR, "unrecognized heap_update status: %u", result); break; } + + return mix_attrs; } diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index ef08e1d3e10..7527809ec08 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -319,7 +319,7 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, - Bitmapset *mix_attrs, + const Bitmapset *mix_attrs, TU_UpdateIndexes *update_indexes) { bool rep_id_key_required = false; @@ -407,10 +407,6 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, Assert(ItemIdIsNormal(lp)); - /* - * Partially construct the oldtup for HeapDetermineColumnsInfo to work and - * then pass that on to heap_update. - */ oldtup.t_tableOid = RelationGetRelid(relation); oldtup.t_data = (HeapTupleHeader) PageGetItem(page, lp); oldtup.t_len = ItemIdGetLength(lp); diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index fdff960c130..73cc3208757 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -155,6 +155,7 @@ bthandler(PG_FUNCTION_ARGS) amroutine->amproperty = btproperty; amroutine->ambuildphasename = btbuildphasename; amroutine->amvalidate = btvalidate; + amroutine->amcomparedatums = btcomparedatums; amroutine->amadjustmembers = btadjustmembers; amroutine->ambeginscan = btbeginscan; amroutine->amrescan = btrescan; @@ -1795,3 +1796,40 @@ bttranslatecmptype(CompareType cmptype, Oid opfamily) return InvalidStrategy; } } + +/* + * btcomparedatums - Compare two datums for equality + * + * This function is necessary because nbtree requires that keys that are not + * binary identical not be "equal". Other indexes might allow "A" and "a" to + * be "equal" when collation is case insensative, but not nbtree. Why? Well, + * nbtree deduplicates TIDs on page split and the way it accomplish that is by + * doing a binary comparison of the keys. + */ + +bool +btcomparedatums(Relation index, int attrnum, + Datum old_datum, bool old_isnull, + Datum new_datum, bool new_isnull) +{ + TupleDesc desc = RelationGetDescr(index); + CompactAttribute *att; + + /* + * If one value is NULL and other is not, then they are certainly not + * equal + */ + if (old_isnull != new_isnull) + return false; + + /* + * If both are NULL, they can be considered equal. + */ + if (old_isnull) + return true; + + /* We do simple binary comparison of the two datums */ + Assert(attrnum <= desc->natts); + att = TupleDescCompactAttr(desc, attrnum - 1); + return datumIsEqual(old_datum, new_datum, att->attbyval, att->attlen); +} diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index dadcf03ed24..ef7736bfa76 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -336,7 +336,7 @@ void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, - Bitmapset *modified_indexed_cols, + const Bitmapset *mix_attrs, TU_UpdateIndexes *update_indexes) { TM_Result result; @@ -348,7 +348,7 @@ simple_table_tuple_update(Relation rel, ItemPointer otid, snapshot, InvalidSnapshot, true /* wait for commit */ , &tmfd, &lockmode, - modified_indexed_cols, + mix_attrs, update_indexes); switch (result) diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index fc8638c1b61..329c110d0bf 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -961,10 +961,18 @@ index_register(Oid heap, newind->il_info->ii_Expressions = copyObject(indexInfo->ii_Expressions); newind->il_info->ii_ExpressionsState = NIL; + /* expression attrs will likely be null, but may as well copy it */ + newind->il_info->ii_ExpressionsAttrs = + copyObject(indexInfo->ii_ExpressionsAttrs); /* predicate will likely be null, but may as well copy it */ newind->il_info->ii_Predicate = copyObject(indexInfo->ii_Predicate); newind->il_info->ii_PredicateState = NULL; + /* predicate attrs will likely be null, but may as well copy it */ + newind->il_info->ii_PredicateAttrs = + copyObject(indexInfo->ii_PredicateAttrs); + newind->il_info->ii_CheckedPredicate = false; + newind->il_info->ii_PredicateSatisfied = false; /* no exclusion constraints at bootstrap time, so no need to copy */ Assert(indexInfo->ii_ExclusionOps == NULL); Assert(indexInfo->ii_ExclusionProcs == NULL); diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 5d9db167e59..29b8cc4badd 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -27,6 +27,7 @@ #include "access/heapam.h" #include "access/multixact.h" #include "access/relscan.h" +#include "access/sysattr.h" #include "access/tableam.h" #include "access/toast_compression.h" #include "access/transam.h" @@ -58,6 +59,7 @@ #include "commands/trigger.h" #include "executor/executor.h" #include "miscadmin.h" +#include "nodes/execnodes.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" @@ -2414,6 +2416,61 @@ index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode) * ---------------------------------------------------------------- */ +/* ---------------- + * BuildUpdateIndexInfo + * + * For expression indexes updates may not change the indexed value allowing + * for a HOT update. Add information to the IndexInfo to allow for checking + * if the indexed value has changed. + * + * Do this processing here rather than in BuildIndexInfo() to not incur the + * overhead in the common non-expression cases. + * ---------------- + */ +void +BuildUpdateIndexInfo(ResultRelInfo *resultRelInfo) +{ + for (int j = 0; j < resultRelInfo->ri_NumIndices; j++) + { + int i; + int indnkeyatts; + Bitmapset *attrs = NULL; + IndexInfo *ii = resultRelInfo->ri_IndexRelationInfo[j]; + + /* + * Expressions are not allowed on non-key attributes, so we can skip + * them as they should show up in the index HOT-blocking attributes. + */ + indnkeyatts = ii->ii_NumIndexKeyAttrs; + + /* Collect key attributes used by the index */ + for (i = 0; i < indnkeyatts; i++) + { + AttrNumber attnum = ii->ii_IndexAttrNumbers[i]; + + if (attnum != 0) + attrs = bms_add_member(attrs, attnum - FirstLowInvalidHeapAttributeNumber); + } + + /* Collect attributes used in the expression */ + if (ii->ii_Expressions) + pull_varattnos((Node *) ii->ii_Expressions, + resultRelInfo->ri_RangeTableIndex, + &ii->ii_ExpressionsAttrs); + + /* Collect attributes used in the predicate */ + if (ii->ii_Predicate) + pull_varattnos((Node *) ii->ii_Predicate, + resultRelInfo->ri_RangeTableIndex, + &ii->ii_PredicateAttrs); + + ii->ii_IndexedAttrs = bms_union(attrs, ii->ii_ExpressionsAttrs); + + /* All indexes should index *something*! */ + Assert(!bms_is_empty(ii->ii_IndexedAttrs)); + } +} + /* ---------------- * BuildIndexInfo * Construct an IndexInfo record for an open index diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c index 004c5121000..a361c215490 100644 --- a/src/backend/catalog/indexing.c +++ b/src/backend/catalog/indexing.c @@ -102,7 +102,7 @@ CatalogIndexInsert(CatalogIndexState indstate, HeapTuple heapTuple, * Get information from the state structure. Fall out if nothing to do. */ numIndexes = indstate->ri_NumIndices; - if (numIndexes == 0) + if (numIndexes == 0 || updateIndexes == TU_None) return; relationDescs = indstate->ri_IndexRelationDescs; indexInfoArray = indstate->ri_IndexRelationInfo; @@ -314,15 +314,18 @@ CatalogTupleUpdate(Relation heapRel, const ItemPointerData *otid, HeapTuple tup) { CatalogIndexState indstate; TU_UpdateIndexes updateIndexes = TU_All; + Bitmapset *updatedAttrs; CatalogTupleCheckConstraints(heapRel, tup); indstate = CatalogOpenIndexes(heapRel); - simple_heap_update(heapRel, otid, tup, &updateIndexes); - + updatedAttrs = simple_heap_update(heapRel, otid, tup, &updateIndexes); + ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = updatedAttrs; CatalogIndexInsert(indstate, tup, updateIndexes); + CatalogCloseIndexes(indstate); + bms_free(updatedAttrs); } /* @@ -338,12 +341,15 @@ CatalogTupleUpdateWithInfo(Relation heapRel, const ItemPointerData *otid, HeapTu CatalogIndexState indstate) { TU_UpdateIndexes updateIndexes = TU_All; + Bitmapset *updatedAttrs; CatalogTupleCheckConstraints(heapRel, tup); - simple_heap_update(heapRel, otid, tup, &updateIndexes); - + updatedAttrs = simple_heap_update(heapRel, otid, tup, &updateIndexes); + ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = updatedAttrs; CatalogIndexInsert(indstate, tup, updateIndexes); + ((ResultRelInfo *) indstate)->ri_ChangedIndexedCols = NULL; + bms_free(updatedAttrs); } /* diff --git a/src/backend/catalog/toasting.c b/src/backend/catalog/toasting.c index 5d819bda54a..c665aa744b3 100644 --- a/src/backend/catalog/toasting.c +++ b/src/backend/catalog/toasting.c @@ -292,8 +292,12 @@ create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, indexInfo->ii_IndexAttrNumbers[1] = 2; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; + indexInfo->ii_ExpressionsAttrs = NULL; indexInfo->ii_Predicate = NIL; indexInfo->ii_PredicateState = NULL; + indexInfo->ii_PredicateAttrs = NULL; + indexInfo->ii_CheckedPredicate = false; + indexInfo->ii_PredicateSatisfied = false; indexInfo->ii_ExclusionOps = NULL; indexInfo->ii_ExclusionProcs = NULL; indexInfo->ii_ExclusionStrats = NULL; diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index fb1bc3a480d..736543147e7 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -109,11 +109,15 @@ #include "access/genam.h" #include "access/relscan.h" #include "access/tableam.h" +#include "access/sysattr.h" #include "access/xact.h" #include "catalog/index.h" #include "executor/executor.h" +#include "nodes/bitmapset.h" +#include "nodes/execnodes.h" #include "nodes/nodeFuncs.h" #include "storage/lmgr.h" +#include "utils/datum.h" #include "utils/multirangetypes.h" #include "utils/rangetypes.h" #include "utils/snapmgr.h" @@ -318,8 +322,8 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, Relation heapRelation; IndexInfo **indexInfoArray; ExprContext *econtext; - Datum values[INDEX_MAX_KEYS]; - bool isnull[INDEX_MAX_KEYS]; + Datum loc_values[INDEX_MAX_KEYS]; + bool loc_isnull[INDEX_MAX_KEYS]; Assert(ItemPointerIsValid(tupleid)); @@ -343,13 +347,13 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, /* Arrange for econtext's scan tuple to be the tuple under test */ econtext->ecxt_scantuple = slot; - /* - * for each index, form and insert the index tuple - */ + /* Insert into each index that needs updating */ for (i = 0; i < numIndices; i++) { Relation indexRelation = relationDescs[i]; IndexInfo *indexInfo; + Datum *values; + bool *isnull; bool applyNoDupErr; IndexUniqueCheck checkUnique; bool indexUnchanged; @@ -366,7 +370,7 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, /* * Skip processing of non-summarizing indexes if we only update - * summarizing indexes + * summarizing indexes or if this index is unchanged. */ if (onlySummarizing && !indexInfo->ii_Summarizing) continue; @@ -387,8 +391,15 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, indexInfo->ii_PredicateState = predicate; } + /* Check the index predicate if we haven't done so earlier on */ + if (!indexInfo->ii_CheckedPredicate) + { + indexInfo->ii_PredicateSatisfied = ExecQual(predicate, econtext); + indexInfo->ii_CheckedPredicate = true; + } + /* Skip this index-update if the predicate isn't satisfied */ - if (!ExecQual(predicate, econtext)) + if (!indexInfo->ii_PredicateSatisfied) continue; } @@ -396,11 +407,10 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, * FormIndexDatum fills in its values and isnull parameters with the * appropriate values for the column(s) of the index. */ - FormIndexDatum(indexInfo, - slot, - estate, - values, - isnull); + FormIndexDatum(indexInfo, slot, estate, loc_values, loc_isnull); + + values = loc_values; + isnull = loc_isnull; /* Check whether to apply noDupErr to this index */ applyNoDupErr = noDupErr && @@ -435,7 +445,9 @@ ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, * index. If we're being called as part of an UPDATE statement, * consider if the 'indexUnchanged' = true hint should be passed. */ - indexUnchanged = update && bms_is_empty(resultRelInfo->ri_ChangedIndexedCols); + indexUnchanged = update && + !bms_overlap(indexInfo->ii_IndexedAttrs, + resultRelInfo->ri_ChangedIndexedCols); satisfiesConstraint = index_insert(indexRelation, /* index relation */ @@ -604,7 +616,12 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, checkedIndex = true; /* Check for partial index */ - if (indexInfo->ii_Predicate != NIL) + if (indexInfo->ii_CheckedPredicate && !indexInfo->ii_PredicateSatisfied) + { + /* We've already checked and the predicate wasn't satisfied. */ + continue; + } + else if (indexInfo->ii_Predicate != NIL) { ExprState *predicate; @@ -1018,3 +1035,284 @@ ExecWithoutOverlapsNotEmpty(Relation rel, NameData attname, Datum attval, char t errmsg("empty WITHOUT OVERLAPS value found in column \"%s\" in relation \"%s\"", NameStr(attname), RelationGetRelationName(rel)))); } + +/* + * ExecWhichIndexesRequireUpdates + * + * Determine which indexes need updating given modified indexed attributes. + * This function is a companion to ExecCheckIndexedAttrsForChanges(). On the + * surface, they appear similar but they are doing two very different things. + * + * For a standard index on a set of attributes this is the intersection of + * the mix_attrs and the index attrs (key, expression, but not predicate). + * + * For expression indexes and indexes which implement the amcomparedatums() + * index AM API we'll need to form index datum and compare each attribute to + * see if any actually changed. + * + * For expression indexes the result of the expression might not change at all, + * this is common with JSONB columns which require expression indexes and where + * it is commonplace to index a field within a document and have updates that + * generally don't update that field. + * + * Partial indexes won't trigger index tuples when the old/new tuples are both + * outside of the predicate range. + * + * For nbtree the amcomparedatums() API is critical as it requires that key + * attributes are equal when they memcmp(), which might not be the case when + * using type-specific comparison or factoring in collation which might make + * an index case insensitive. + * + * All of this is to say that the goal is for the executor to know, ahead of + * calling into the table AM for the update and before calling into the index + * AM for inserting new index tuples, which attributes at a minimum will + * necessitate a new index tuple. + * + * The mix_attrs parameter contains attributes that: + * 1. Were refereced by the UPDATE statement and are known to have been modified + * 2. Are referenced by at least one index (expression, predicate, or otherwise) + * + * This function refines that set by determining which indexes actually + * need updates and only retaining the attributes that indicate that: + * - Partial index predicates: If both tuples fall outside, no update needed + * - Expression indexes: If expression results are identical, no update needed + * - Extraction indexes (GIN, RUM, etc): If extracted values are identical, no update needed + * + * Returns a refined Bitmapset of attributes that force index updates. + */ +Bitmapset * +ExecWhichIndexesRequireUpdates(ResultRelInfo *relinfo, + Bitmapset *mix_attrs, + EState *estate, + TupleTableSlot *old_tts, + TupleTableSlot *new_tts) +{ + Bitmapset *result_attrs = NULL; + ExprContext *econtext = GetPerTupleExprContext(estate); + TupleTableSlot *save_scantuple; + int i; + + if (relinfo->ri_NumIndices == 0 || bms_is_empty(mix_attrs)) + return NULL; + + /* Check each index */ + for (i = 0; i < relinfo->ri_NumIndices; i++) + { + Relation indexRel = relinfo->ri_IndexRelationDescs[i]; + IndexInfo *indexInfo = relinfo->ri_IndexRelationInfo[i]; + IndexAmRoutine *amroutine = indexRel->rd_indam; + bool has_expressions = (indexInfo->ii_Expressions != NIL); + bool has_am_compare = (amroutine->amcomparedatums != NULL); + bool is_partial = (indexInfo->ii_Predicate != NIL); + Bitmapset *idx_attrs = indexInfo->ii_IndexedAttrs; + Bitmapset *pre_attrs = indexInfo->ii_PredicateAttrs; + + /* Check partial index predicate iff attrs overlap with modified */ + if (is_partial && + !bms_is_empty((pre_attrs = bms_intersect(pre_attrs, mix_attrs)))) + { + ExprState *pstate; + bool old_qualifies, + new_qualifies; + + if (!indexInfo->ii_CheckedPredicate) + pstate = ExecPrepareQual(indexInfo->ii_Predicate, estate); + else + pstate = indexInfo->ii_PredicateState; + + save_scantuple = econtext->ecxt_scantuple; + + econtext->ecxt_scantuple = old_tts; + old_qualifies = ExecQual(pstate, econtext); + + econtext->ecxt_scantuple = new_tts; + new_qualifies = ExecQual(pstate, econtext); + + econtext->ecxt_scantuple = save_scantuple; + + indexInfo->ii_CheckedPredicate = true; + indexInfo->ii_PredicateState = pstate; + indexInfo->ii_PredicateSatisfied = new_qualifies; + + /* Both outside predicate, index doesn't need update */ + if (!old_qualifies && !new_qualifies) + { + bms_free(pre_attrs); + continue; + } + + /* Predicate transition, must update index, add predicate attrs */ + if (old_qualifies != new_qualifies) + { + /* + * We can say for sure that the index needs update, so we can + * add in the attributes from the predicate that are also in + * the mix_attrs set, but we don't yet know if there are other + * attributes this index references that are modified (in the + * mix_attrs) and force index updates. The only way to know + * if to test them one by one. + */ + result_attrs = bms_add_members(result_attrs, pre_attrs); + bms_free(pre_attrs); + } + } + + /* + * If we've got a result set equal to our modified set then we've + * identified that all the attributes on the index need to trigger new + * index tuples. No need to keep checking, we're done not just with + * this index but in general, break out of the loop here. + */ + if (bms_equal(result_attrs, mix_attrs)) + break; + + /* + * NOTE: While it feels like we could avoid checking any further when + * the indexed attributes (key and expression) do not overlap with the + * modified indexed attributes (mix_attrs) we can't. Why? Well, it + * turns out that it is allowable for index AMs to have a different + * notion of equality. For instance, nbtree requires that datum + * equality be based on binary comparison, not anything type-specific. + * So, it is possible that the set of mix_attrs from + * ExecCheckIndexedAttrsForChanges() found that the new value of an + * attribute was equal to the old value despite it having a different + * binary representation. + * + * XXX: maybe that's something we should enforce and change nbtree? + */ + + /* + * Expression index or extraction-based index require us to form index + * datums/tuples and compare. We've done all we can to avoid this + * overhead now it's time to bite the bullet and get it done. + */ + if (has_expressions || has_am_compare) + { + Datum old_values[INDEX_MAX_KEYS]; + bool old_isnull[INDEX_MAX_KEYS]; + Datum new_values[INDEX_MAX_KEYS]; + bool new_isnull[INDEX_MAX_KEYS]; + TupleDesc indexdesc = RelationGetDescr(indexRel); + + save_scantuple = econtext->ecxt_scantuple; + + /* Evaluate expressions (if any) to get base datums */ + econtext->ecxt_scantuple = old_tts; + FormIndexDatum(indexInfo, old_tts, estate, + old_values, old_isnull); + + econtext->ecxt_scantuple = new_tts; + FormIndexDatum(indexInfo, new_tts, estate, + new_values, new_isnull); + + econtext->ecxt_scantuple = save_scantuple; + + /* Compare the index key datums */ + for (int j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++) + { + AttrNumber attrnum = indexInfo->ii_IndexAttrNumbers[j]; + bool values_equal; + + /* + * Skip attributes that we've already identified as triggering + * an index update. + */ + if (attrnum > 0 && + bms_is_member(attrnum - FirstLowInvalidHeapAttributeNumber, result_attrs)) + continue; + + /* A change to/from NULL, record this attribute */ + if (old_isnull[j] != new_isnull[j]) + { + if (attrnum == 0) + result_attrs = bms_add_members(result_attrs, indexInfo->ii_ExpressionsAttrs); + else + result_attrs = bms_add_member(result_attrs, attrnum - FirstLowInvalidHeapAttributeNumber); + continue; + } + /* Both NULL, no change */ + if (old_isnull[j]) + continue; + + /* + * Use index AM's comparison function if present when + * comparing the datum formed when creating an index key. This + * is different from the comparison of datum in the tuple + * destined for a storage AM, which is why we only need to use + * this here. + */ + if (has_am_compare) + { + /* + * For nbtree to properly deduplicate TIDs on page split + * it must treat equality as binary comparison. So it is + * vital that we call it's comparedatums() function. + * + * In the case of GIN/RUM indexes they too behave + * differently and can even extract one or more portions + * of the datum when forming index tuples. We'd like to + * know if this update needs to trigger one or more index + * tuples, so we let the index AM perform their extraction + * and compare the results. + * + * There may be other similar index AM implementation with + * extraction where indexes are built using only part(s) + * of the Datum and might even need to invoke + * type-specific equality operators. + * + * NOTE: For AM comparison, pass the 1-based index + * attribute number. The AM's compare function expects the + * same numbering as used internally by the AM. + */ + values_equal = amroutine->amcomparedatums(indexRel, j + 1, + old_values[j], old_isnull[j], + new_values[j], new_isnull[j]); + } + else + { + /* + * Expression index without custom AM comparison. Compare + * the expression results using type-specific equality via + * the TypeCache. + */ + Form_pg_attribute attr = TupleDescAttr(indexdesc, j); + + values_equal = tts_attr_equal(attr->atttypid, + attr->attcollation, + attr->attbyval, + attr->attlen, + old_values[j], + new_values[j]); + } + + if (!values_equal) + { + if (attrnum == 0) + result_attrs = bms_add_members(result_attrs, indexInfo->ii_ExpressionsAttrs); + else + result_attrs = bms_add_member(result_attrs, attrnum - FirstLowInvalidHeapAttributeNumber); + } + } + } + else + { + /* + * Here we know that we're reviewing an index that doesn't have a + * partial predicate, isn't an expression index, and doesn't have + * a amcomparedatums() implementation. Attributes that overlap + * with those known to have changed are the ones we need to + * record. + */ + + /* + * NOTE: we intentionally copy via intersect and then free rather + * than modify on the set we're pointing at in IndexInfo. + */ + idx_attrs = bms_intersect(idx_attrs, mix_attrs); + result_attrs = bms_add_members(result_attrs, idx_attrs); + bms_free(idx_attrs); + } + } + + return result_attrs; +} diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 4d1cf50e369..191748cdce8 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -54,10 +54,12 @@ #include "postgres.h" #include "access/htup_details.h" +#include "access/sysattr.h" #include "access/tableam.h" #include "access/tupconvert.h" #include "access/tupdesc.h" #include "access/xact.h" +#include "catalog/index.h" #include "commands/trigger.h" #include "executor/execPartition.h" #include "executor/executor.h" @@ -75,6 +77,7 @@ #include "utils/float.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/relcache.h" #include "utils/snapmgr.h" @@ -2395,6 +2398,12 @@ ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, bool partition_constraint_failed; TM_Result result; + /* the set of modified indexed attributes */ + Bitmapset *mix_attrs = NULL; + + /* the set of index attributes that trigger new index datum */ + Bitmapset *iru_attrs = NULL; + updateCxt->crossPartUpdate = false; /* @@ -2517,13 +2526,48 @@ lreplace: bms_free(resultRelInfo->ri_ChangedIndexedCols); resultRelInfo->ri_ChangedIndexedCols = NULL; - resultRelInfo->ri_ChangedIndexedCols = - ExecCheckIndexedAttrsForChanges(resultRelInfo, oldSlot, slot); + /* + * At this point we know the set of attributes specified in the UPDATE + * statement and those referenced by triggers, so we have a complete view + * of the UPDATE attributes on the table. We could get this set via the + * ExecGetUpdatedCols() function, but we'll need to review all indexed + * attributes because extensions could just directly heap_modify_tuple() + * an attribute not known to ExecGetUpdatedCols(). + * + * We want to know which, if any, attributes that are referenced by an + * index have changed value. This set of attributes will dictate the + * minimum number of indexes we need to update. + */ + mix_attrs = ExecCheckIndexedAttrsForChanges(resultRelInfo, oldSlot, slot); /* - * replace the heap tuple + * During updates we'll need a bit more information in IndexInfo but we've + * delayed adding it until here. We check to ensure that there are + * indexes, that something has changed that is indexed, and that the first + * index doesn't yet have ii_IndexedAttrs set as a way to ensure we only + * build this when needed and only once. We don't build this in + * ExecOpenIndicies() as it is unnecessary overhead when not performing an + * update. + */ + if (resultRelInfo->ri_NumIndices > 0 && + bms_is_empty(resultRelInfo->ri_IndexRelationInfo[0]->ii_IndexedAttrs)) + BuildUpdateIndexInfo(resultRelInfo); + + /* + * The next step is to identify which indexes, at a minimum, require new + * index tuples. You might think that you could simply intersect the + * index key attributes with the modified attributes and be done, but then + * you'd have missed a few cases (expressions, partial, indexes with + * operators that index only a portion of a datum or many different + * portions of it). + */ + iru_attrs = ExecWhichIndexesRequireUpdates(resultRelInfo, mix_attrs, estate, + oldSlot, slot); + + /* + * Call into the table AM to update the heap tuple. * - * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that + * NOTE: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that * the row to be updated is visible to that snapshot, and throw a * can't-serialize error if not. This is a special-case behavior needed * for referential integrity updates in transaction-snapshot mode @@ -2535,9 +2579,14 @@ lreplace: estate->es_crosscheck_snapshot, true /* wait for commit */ , &context->tmfd, &updateCxt->lockmode, - resultRelInfo->ri_ChangedIndexedCols, + iru_attrs, &updateCxt->updateIndexes); + Assert(bms_is_empty(resultRelInfo->ri_ChangedIndexedCols)); + resultRelInfo->ri_ChangedIndexedCols = iru_attrs; + + bms_free(mix_attrs); + return result; } @@ -2555,7 +2604,7 @@ ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt, ModifyTableState *mtstate = context->mtstate; List *recheckIndexes = NIL; - /* insert index entries for tuple if necessary */ + /* Insert index entries for tuple if necessary */ if (resultRelInfo->ri_NumIndices > 0 && (updateCxt->updateIndexes != TU_None)) recheckIndexes = ExecInsertIndexTuples(resultRelInfo, slot, context->estate, diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index d69dc090aa4..e9a53b95caf 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -855,10 +855,14 @@ makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions, /* expressions */ n->ii_Expressions = expressions; n->ii_ExpressionsState = NIL; + n->ii_ExpressionsAttrs = NULL; /* predicates */ n->ii_Predicate = predicates; n->ii_PredicateState = NULL; + n->ii_PredicateAttrs = NULL; + n->ii_CheckedPredicate = false; + n->ii_PredicateSatisfied = false; /* exclusion constraints */ n->ii_ExclusionOps = NULL; diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h index 63dd41c1f21..9bdf73eda59 100644 --- a/src/include/access/amapi.h +++ b/src/include/access/amapi.h @@ -211,6 +211,33 @@ typedef void (*ammarkpos_function) (IndexScanDesc scan); /* restore marked scan position */ typedef void (*amrestrpos_function) (IndexScanDesc scan); +/* + * amcomparedatums - Compare datums to determine if index update is needed + * + * This function compares old_datum and new_datum to determine if they would + * produce different index entries. For extraction-based indexes (GIN, RUM), + * this should: + * 1. Extract keys from old_datum using the opclass's extractValue function + * 2. Extract keys from new_datum using the opclass's extractValue function + * 3. Compare the two sets of keys using appropriate equality operators + * 4. Return true if the sets are equal (no index update needed) + * + * The comparison should account for: + * - Different numbers of extracted keys + * - NULL values + * - Type-specific equality (not just binary equality) + * - Opclass parameters (e.g., path in bson_rum_single_path_ops) + * + * For the DocumentDB example with path='a', this would extract values at + * path 'a' from both old and new BSON documents and compare them using + * BSON's equality operator. + */ +/* identify if updated datums would produce one or more index entries */ +typedef bool (*amcomparedatums_function) (Relation indexRelation, + int attno, + Datum old_datum, bool old_isnull, + Datum new_datum, bool new_isnull); + /* * Callback function signatures - for parallel index scans. */ @@ -313,6 +340,7 @@ typedef struct IndexAmRoutine amendscan_function amendscan; ammarkpos_function ammarkpos; /* can be NULL */ amrestrpos_function amrestrpos; /* can be NULL */ + amcomparedatums_function amcomparedatums; /* can be NULL */ /* interface functions to support parallel index scans */ amestimateparallelscan_function amestimateparallelscan; /* can be NULL */ diff --git a/src/include/access/gin.h b/src/include/access/gin.h index 13ea91922ef..2f265f4816c 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -100,6 +100,9 @@ extern PGDLLIMPORT int gin_pending_list_limit; extern void ginGetStats(Relation index, GinStatsData *stats); extern void ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build); +extern bool gincomparedatums(Relation index, int attnum, + Datum old_datum, bool old_isnull, + Datum new_datum, bool new_isnull); extern void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc); diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 41d541aa6b2..59db389a546 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -326,7 +326,7 @@ extern TM_Result heap_update(Relation relation, HeapTupleData *oldtup, TM_FailureData *tmfd, LockTupleMode *lockmode, Buffer buffer, Page page, BlockNumber block, ItemId lp, Bitmapset *hot_attrs, Bitmapset *sum_attrs, Bitmapset *pk_attrs, Bitmapset *rid_attrs, - Bitmapset *mix_attrs, Buffer *vmbuffer, + const Bitmapset *mix_attrs, Buffer *vmbuffer, bool rep_id_key_required, TU_UpdateIndexes *update_indexes); extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, @@ -361,8 +361,8 @@ extern bool heap_tuple_needs_eventual_freeze(HeapTupleHeader tuple); extern void simple_heap_insert(Relation relation, HeapTuple tup); extern void simple_heap_delete(Relation relation, const ItemPointerData *tid); -extern void simple_heap_update(Relation relation, const ItemPointerData *otid, - HeapTuple tup, TU_UpdateIndexes *update_indexes); +extern Bitmapset *simple_heap_update(Relation relation, const ItemPointerData *otid, + HeapTuple tup, TU_UpdateIndexes *update_indexes); extern TransactionId heap_index_delete_tuples(Relation rel, TM_IndexDeleteOp *delstate); diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 16be5c7a9c1..42bd329eaad 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -1210,6 +1210,10 @@ extern int btgettreeheight(Relation rel); extern CompareType bttranslatestrategy(StrategyNumber strategy, Oid opfamily); extern StrategyNumber bttranslatecmptype(CompareType cmptype, Oid opfamily); +extern bool btcomparedatums(Relation index, int attnum, + Datum old_datum, bool old_isnull, + Datum new_datum, bool new_isnull); + /* * prototypes for internal functions in nbtree.c diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 8a5931a3118..2b9206ff24a 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -549,7 +549,7 @@ typedef struct TableAmRoutine bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, - Bitmapset *updated_cols, + const Bitmapset *updated_cols, TU_UpdateIndexes *update_indexes); /* see table_tuple_lock() for reference about parameters */ @@ -1503,12 +1503,12 @@ static inline TM_Result table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, TM_FailureData *tmfd, LockTupleMode *lockmode, - Bitmapset *updated_cols, TU_UpdateIndexes *update_indexes) + const Bitmapset *mix_cols, TU_UpdateIndexes *update_indexes) { return rel->rd_tableam->tuple_update(rel, otid, slot, cid, snapshot, crosscheck, wait, tmfd, lockmode, - updated_cols, update_indexes); + mix_cols, update_indexes); } /* @@ -2011,7 +2011,7 @@ extern void simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot); extern void simple_table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, Snapshot snapshot, - Bitmapset *modified_indexe_attrs, + const Bitmapset *mix_attrs, TU_UpdateIndexes *update_indexes); diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index dda95e54903..8d364f8b30f 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -132,6 +132,7 @@ extern bool CompareIndexInfo(const IndexInfo *info1, const IndexInfo *info2, const AttrMap *attmap); extern void BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii); +extern void BuildUpdateIndexInfo(ResultRelInfo *resultRelInfo); extern void FormIndexDatum(IndexInfo *indexInfo, TupleTableSlot *slot, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 993dc0e6ced..dda48f17605 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -739,6 +739,11 @@ extern Bitmapset *ExecGetAllUpdatedCols(ResultRelInfo *relinfo, EState *estate); */ extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative); extern void ExecCloseIndices(ResultRelInfo *resultRelInfo); +extern Bitmapset *ExecWhichIndexesRequireUpdates(ResultRelInfo *relinfo, + Bitmapset *mix_attrs, + EState *estate, + TupleTableSlot *old_tts, + TupleTableSlot *new_tts); extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, EState *estate, bool update, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 898368fb8cb..d8e88817206 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -174,15 +174,29 @@ typedef struct IndexInfo */ AttrNumber ii_IndexAttrNumbers[INDEX_MAX_KEYS]; + /* + * All key, expression, sumarizing, and partition attributes referenced by + * this index + */ + Bitmapset *ii_IndexedAttrs; + /* expr trees for expression entries, or NIL if none */ List *ii_Expressions; /* list of Expr */ /* exec state for expressions, or NIL if none */ List *ii_ExpressionsState; /* list of ExprState */ + /* attributes exclusively referenced by expression indexes */ + Bitmapset *ii_ExpressionsAttrs; /* partial-index predicate, or NIL if none */ List *ii_Predicate; /* list of Expr */ /* exec state for expressions, or NIL if none */ ExprState *ii_PredicateState; + /* attributes referenced by the predicate */ + Bitmapset *ii_PredicateAttrs; + /* partial index predicate determined yet? */ + bool ii_CheckedPredicate; + /* amupdate hint used to avoid rechecking predicate */ + bool ii_PredicateSatisfied; /* Per-column exclusion operators, or NULL if none */ Oid *ii_ExclusionOps; /* array with one entry per column */ @@ -494,6 +508,11 @@ typedef struct ResultRelInfo Bitmapset *ri_extraUpdatedCols; /* true if the above has been computed */ bool ri_extraUpdatedCols_valid; + + /* + * For UPDATE a Bitmapset of the attributes that are both indexed and have + * changed in value. + */ Bitmapset *ri_ChangedIndexedCols; /* Projection to generate new tuple in an INSERT/UPDATE */ diff --git a/src/test/regress/expected/hot_expression_indexes.out b/src/test/regress/expected/hot_expression_indexes.out new file mode 100644 index 00000000000..d3eb07742c6 --- /dev/null +++ b/src/test/regress/expected/hot_expression_indexes.out @@ -0,0 +1,644 @@ +-- ================================================================ +-- Test Suite for HOT Updates with Expression and Partial Indexes +-- ================================================================ +-- Setup: Create function to measure HOT updates +CREATE OR REPLACE FUNCTION check_hot_updates( + expected INT, + p_table_name TEXT DEFAULT 't', + p_schema_name TEXT DEFAULT current_schema() +) +RETURNS TABLE ( + table_name TEXT, + total_updates BIGINT, + hot_updates BIGINT, + hot_update_percentage NUMERIC, + matches_expected BOOLEAN +) +LANGUAGE plpgsql +AS $$ +DECLARE + v_relid oid; + v_qualified_name TEXT; + v_hot_updates BIGINT; + v_updates BIGINT; + v_xact_hot_updates BIGINT; + v_xact_updates BIGINT; +BEGIN + -- Force statistics update + PERFORM pg_stat_force_next_flush(); + + -- Get table OID + v_qualified_name := quote_ident(p_schema_name) || '.' || quote_ident(p_table_name); + v_relid := v_qualified_name::regclass; + + IF v_relid IS NULL THEN + RAISE EXCEPTION 'Table %.% not found', p_schema_name, p_table_name; + END IF; + + -- Get cumulative + transaction stats + v_hot_updates := COALESCE(pg_stat_get_tuples_hot_updated(v_relid), 0); + v_updates := COALESCE(pg_stat_get_tuples_updated(v_relid), 0); + v_xact_hot_updates := COALESCE(pg_stat_get_xact_tuples_hot_updated(v_relid), 0); + v_xact_updates := COALESCE(pg_stat_get_xact_tuples_updated(v_relid), 0); + + v_hot_updates := v_hot_updates + v_xact_hot_updates; + v_updates := v_updates + v_xact_updates; + + RETURN QUERY + SELECT + p_table_name::TEXT, + v_updates::BIGINT, + v_hot_updates::BIGINT, + CASE WHEN v_updates > 0 + THEN ROUND((v_hot_updates::numeric / v_updates::numeric * 100)::numeric, 2) + ELSE 0 + END, + (v_hot_updates = expected)::BOOLEAN; +END; +$$; +-- ================================================================ +-- Basic JSONB Expression Index +-- ================================================================ +CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_name_idx ON t((docs->>'name')); +INSERT INTO t VALUES (1, '{"name": "alice", "age": 30}'); +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET docs = '{"name": "alice", "age": 31}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Update indexed JSONB field - should NOT be HOT +UPDATE t SET docs = '{"name": "bob", "age": 31}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Update non-indexed field again - should be HOT +UPDATE t SET docs = '{"name": "bob", "age": 32}' WHERE id = 1; +SELECT * FROM check_hot_updates(2, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 2 | 66.67 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Partial Index with Predicate Transitions +-- ================================================================ +CREATE TABLE t(id INT, value INT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_value_idx ON t(value) WHERE value > 10; +INSERT INTO t VALUES (1, 5); +-- Both outside predicate - should be HOT +UPDATE t SET value = 8 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Transition into predicate - should NOT be HOT +UPDATE t SET value = 15 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Both inside predicate, value changes - should NOT be HOT +UPDATE t SET value = 20 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +-- Transition out of predicate - should NOT be HOT +UPDATE t SET value = 5 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 4 | 1 | 25.00 | t +(1 row) + +-- Both outside predicate again - should be HOT +UPDATE t SET value = 3 WHERE id = 1; +SELECT * FROM check_hot_updates(2, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 5 | 2 | 40.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Expression Index with Partial Predicate +-- ================================================================ +CREATE TABLE t(docs JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t((docs->>'status')) + WHERE (docs->>'priority')::int > 5; +INSERT INTO t VALUES ('{"status": "pending", "priority": 3}'); +-- Both outside predicate, status unchanged - should be HOT +UPDATE t SET docs = '{"status": "pending", "priority": 4}'; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Transition into predicate - should NOT be HOT +UPDATE t SET docs = '{"status": "pending", "priority": 10}'; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Inside predicate, status changes - should NOT be HOT +UPDATE t SET docs = '{"status": "active", "priority": 10}'; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +-- Inside predicate, status unchanged - should be HOT +UPDATE t SET docs = '{"status": "active", "priority": 8}'; +SELECT * FROM check_hot_updates(2, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 4 | 2 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- GIN Index on JSONB +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin_idx ON t USING gin(data); +INSERT INTO t VALUES (1, '{"tags": ["postgres", "database"]}'); +-- Change tags - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["postgres", "sql"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +-- Change tags again - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 0 | 0.00 | t +(1 row) + +-- Add field without changing existing keys - GIN keys changed (added "note"), NOT HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"], "note": "test"}' WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 0 | 0.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- GIN Index with Unchanged Keys +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +-- Create GIN index on specific path +CREATE INDEX t_gin_idx ON t USING gin((data->'tags')); +INSERT INTO t VALUES (1, '{"tags": ["postgres", "sql"], "status": "active"}'); +-- Change non-indexed field - GIN keys on 'tags' unchanged, should be HOT +UPDATE t SET data = '{"tags": ["postgres", "sql"], "status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Change indexed tags - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"], "status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- GIN with jsonb_path_ops +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin_idx ON t USING gin(data jsonb_path_ops); +INSERT INTO t VALUES (1, '{"user": {"name": "alice"}, "tags": ["a", "b"]}'); +-- Change value at different path - keys changed, NOT HOT +UPDATE t SET data = '{"user": {"name": "bob"}, "tags": ["a", "b"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- BRIN Index +-- ================================================================ +CREATE TABLE t(id INT, value INT, data TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +-- BRIN tracks min/max per block range +CREATE INDEX t_brin_idx ON t USING brin(value); +INSERT INTO t VALUES (1, 100, 'initial'); +-- Update non-indexed column - BRIN doesn't care, should be HOT +UPDATE t SET data = 'updated' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Update indexed column but stay in same range - should still be HOT +-- (Note: BRIN tracks ranges, not exact values) +UPDATE t SET value = 105 WHERE id = 1; +SELECT * FROM check_hot_updates(2, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 2 | 100.00 | t +(1 row) + +-- Change to significantly different value - still HOT for single row +-- (BRIN summary won't change for single-row updates in same block) +UPDATE t SET value = 1000 WHERE id = 1; +SELECT * FROM check_hot_updates(3, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 3 | 100.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Multi-Column Expression Index +-- ================================================================ +CREATE TABLE t(id INT, a INT, b INT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t(id, abs(a), abs(b)); +INSERT INTO t VALUES (1, -5, -10); +-- Change sign but not abs value - should be HOT +UPDATE t SET a = 5 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Change abs value - should NOT be HOT +UPDATE t SET b = -15 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Change id - should NOT be HOT +UPDATE t SET id = 2 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Mixed Index Types (BRIN + Expression) +-- ================================================================ +CREATE TABLE t(id INT, value INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_brin_idx ON t USING brin(value); +CREATE INDEX t_expr_idx ON t((data->>'status')); +INSERT INTO t VALUES (1, 100, '{"status": "active"}'); +-- Update only BRIN column - should be HOT +UPDATE t SET value = 200 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Update only expression column - should NOT be HOT +UPDATE t SET data = '{"status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Update both - should NOT be HOT +UPDATE t SET value = 300, data = '{"status": "pending"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Expression with COLLATION +-- ================================================================ +CREATE TABLE t(id INT, name TEXT COLLATE "C") + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_lower_idx ON t(lower(name)); +INSERT INTO t VALUES (1, 'ALICE'); +-- Change case but not lowercase value - should be HOT +UPDATE t SET name = 'Alice' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Change lowercase value - should NOT be HOT +UPDATE t SET name = 'BOB' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Array Expression Index +-- ================================================================ +CREATE TABLE t(id INT, tags TEXT[]) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_array_len_idx ON t(array_length(tags, 1)); +INSERT INTO t VALUES (1, ARRAY['a', 'b', 'c']); +-- Same length, different elements - should be HOT +UPDATE t SET tags = ARRAY['d', 'e', 'f'] WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Different length - should NOT be HOT +UPDATE t SET tags = ARRAY['d', 'e'] WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Nested JSONB Expression +-- ================================================================ +CREATE TABLE t(data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_nested_idx ON t((data->'user'->>'name')); +INSERT INTO t VALUES ('{"user": {"name": "alice", "age": 30}}'); +-- Change nested non-indexed field - should be HOT +UPDATE t SET data = '{"user": {"name": "alice", "age": 31}}'; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Change nested indexed field - should NOT be HOT +UPDATE t SET data = '{"user": {"name": "bob", "age": 31}}'; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- Complex Predicate on Multiple JSONB Fields +-- ================================================================ +CREATE TABLE t(data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t((data->>'status')) + WHERE (data->>'priority')::int > 5 + AND (data->>'active')::boolean = true; +INSERT INTO t VALUES ('{"status": "pending", "priority": 3, "active": true}'); +-- Outside predicate (priority too low) - should be HOT +UPDATE t SET data = '{"status": "done", "priority": 3, "active": true}'; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Transition into predicate - should NOT be HOT +UPDATE t SET data = '{"status": "done", "priority": 10, "active": true}'; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Inside predicate, change to outside (active = false) - should NOT be HOT +UPDATE t SET data = '{"status": "done", "priority": 10, "active": false}'; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- TOASTed Values in Expression Index +-- ================================================================ +CREATE TABLE t(id INT, large_text TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_substr_idx ON t(substr(large_text, 1, 10)); +INSERT INTO t VALUES (1, repeat('x', 5000) || 'identifier'); +-- Change end of string, prefix unchanged - should be HOT +UPDATE t SET large_text = repeat('x', 5000) || 'different' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Change prefix - should NOT be HOT +UPDATE t SET large_text = repeat('y', 5000) || 'different' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +DROP TABLE t; +-- ================================================================ +-- TEST: GIN with TOASTed TEXT (tsvector) +-- ================================================================ +CREATE TABLE t(id INT, content TEXT, search_vec tsvector) + WITH (autovacuum_enabled = off, fillfactor = 70); +-- Create trigger to maintain tsvector +CREATE TRIGGER tsvectorupdate_toast + BEFORE INSERT OR UPDATE ON t + FOR EACH ROW EXECUTE FUNCTION + tsvector_update_trigger(search_vec, 'pg_catalog.english', content); +CREATE INDEX t_gin ON t USING gin(search_vec); +-- Insert with large content (will be TOASTed) +INSERT INTO t (id, content) VALUES + (1, repeat('important keyword ', 1000) || repeat('filler text ', 10000)); +-- Verify initial state +SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('important'); + count +------- + 1 +(1 row) + +-- Expected: 1 row +-- IMPORTANT: The BEFORE UPDATE trigger modifies search_vec, so by the time +-- ExecWhichIndexesRequireUpdates() runs, search_vec has already changed. +-- This means the comparison sees old tsvector vs. trigger-modified tsvector, +-- not the natural progression. HOT won't happen because the trigger changed +-- the indexed column. +-- Update: Even though content keywords unchanged, trigger still fires +UPDATE t +SET content = repeat('important keyword ', 1000) || repeat('different filler ', 10000) +WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +-- Expected: 0 HOT (trigger modifies search_vec, blocking HOT) +-- This is actually correct behavior - the trigger updated an indexed column +-- Update: Change indexed keywords +UPDATE t +SET content = repeat('critical keyword ', 1000) || repeat('different filler ', 10000) +WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 0 | 0.00 | t +(1 row) + +-- Expected: 0 HOT (index keys changed) +-- Verify query correctness +SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('critical'); + count +------- + 1 +(1 row) + +-- Expected: 1 row +DROP TABLE t CASCADE; +-- ================================================================ +-- TEST: GIN with TOASTed JSONB +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin ON t USING gin((data->'tags')); +-- Insert with TOASTed JSONB +INSERT INTO t (id, data) VALUES + (1, jsonb_build_object( + 'tags', '["postgres", "database"]'::jsonb, + 'large_field', repeat('x', 10000) + )); +-- Update: Change large_field, tags unchanged - should be HOT +UPDATE t +SET data = jsonb_build_object( + 'tags', '["postgres", "database"]'::jsonb, + 'large_field', repeat('y', 10000) +) +WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 1 | 100.00 | t +(1 row) + +-- Expected: 1 HOT update +-- Update: Change tags - should NOT be HOT +UPDATE t +SET data = jsonb_build_object( + 'tags', '["postgres", "sql"]'::jsonb, + 'large_field', repeat('y', 10000) +) +WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Expected: Still 1 HOT +-- Verify correctness +SELECT count(*) FROM t WHERE data->'tags' @> '["database"]'::jsonb; + count +------- + 0 +(1 row) + +-- Expected: 0 rows +SELECT count(*) FROM t WHERE data->'tags' @> '["sql"]'::jsonb; + count +------- + 1 +(1 row) + +-- Expected: 1 row +DROP TABLE t CASCADE; +-- ================================================================ +-- TEST: GIN with Array of Large Strings +-- ================================================================ +CREATE TABLE t(id INT, tags TEXT[]) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin ON t USING gin(tags); +-- Insert with large array elements (might be TOASTed) +INSERT INTO t (id, tags) VALUES + (1, ARRAY[repeat('tag1', 1000), repeat('tag2', 1000)]); +-- Update: Change to different large values - NOT HOT +UPDATE t +SET tags = ARRAY[repeat('tag3', 1000), repeat('tag4', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 1 | 0 | 0.00 | t +(1 row) + +-- Expected: 0 HOT (keys actually changed) +-- Update: Keep same tag values, just reorder - SHOULD BE HOT +-- (GIN is order-insensitive: both [tag3,tag4] and [tag4,tag3] +-- extract to the same sorted key set ['tag3','tag4']) +UPDATE t +SET tags = ARRAY[repeat('tag4', 1000), repeat('tag3', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 2 | 1 | 50.00 | t +(1 row) + +-- Expected: 1 HOT (GIN keys semantically identical) +-- Update: Remove an element - NOT HOT (keys changed) +UPDATE t +SET tags = ARRAY[repeat('tag4', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + table_name | total_updates | hot_updates | hot_update_percentage | matches_expected +------------+---------------+-------------+-----------------------+------------------ + t | 3 | 1 | 33.33 | t +(1 row) + +-- Expected: Still 1 HOT (not this one) +DROP TABLE t CASCADE; +-- Cleanup +DROP FUNCTION check_hot_updates(int, text, text); diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index f56482fb9f1..4459625a59b 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -125,6 +125,12 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # ---------- test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa eager_aggregate + +# ---------- +# Another group of parallel tests, these focused on heap HOT updates +# ---------- +test: hot_expression_indexes + # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL # oidjoins is read-only, though, and should run late for best coverage diff --git a/src/test/regress/sql/hot_expression_indexes.sql b/src/test/regress/sql/hot_expression_indexes.sql new file mode 100644 index 00000000000..5dcadbde465 --- /dev/null +++ b/src/test/regress/sql/hot_expression_indexes.sql @@ -0,0 +1,491 @@ +-- ================================================================ +-- Test Suite for HOT Updates with Expression and Partial Indexes +-- ================================================================ + +-- Setup: Create function to measure HOT updates +CREATE OR REPLACE FUNCTION check_hot_updates( + expected INT, + p_table_name TEXT DEFAULT 't', + p_schema_name TEXT DEFAULT current_schema() +) +RETURNS TABLE ( + table_name TEXT, + total_updates BIGINT, + hot_updates BIGINT, + hot_update_percentage NUMERIC, + matches_expected BOOLEAN +) +LANGUAGE plpgsql +AS $$ +DECLARE + v_relid oid; + v_qualified_name TEXT; + v_hot_updates BIGINT; + v_updates BIGINT; + v_xact_hot_updates BIGINT; + v_xact_updates BIGINT; +BEGIN + -- Force statistics update + PERFORM pg_stat_force_next_flush(); + + -- Get table OID + v_qualified_name := quote_ident(p_schema_name) || '.' || quote_ident(p_table_name); + v_relid := v_qualified_name::regclass; + + IF v_relid IS NULL THEN + RAISE EXCEPTION 'Table %.% not found', p_schema_name, p_table_name; + END IF; + + -- Get cumulative + transaction stats + v_hot_updates := COALESCE(pg_stat_get_tuples_hot_updated(v_relid), 0); + v_updates := COALESCE(pg_stat_get_tuples_updated(v_relid), 0); + v_xact_hot_updates := COALESCE(pg_stat_get_xact_tuples_hot_updated(v_relid), 0); + v_xact_updates := COALESCE(pg_stat_get_xact_tuples_updated(v_relid), 0); + + v_hot_updates := v_hot_updates + v_xact_hot_updates; + v_updates := v_updates + v_xact_updates; + + RETURN QUERY + SELECT + p_table_name::TEXT, + v_updates::BIGINT, + v_hot_updates::BIGINT, + CASE WHEN v_updates > 0 + THEN ROUND((v_hot_updates::numeric / v_updates::numeric * 100)::numeric, 2) + ELSE 0 + END, + (v_hot_updates = expected)::BOOLEAN; +END; +$$; + +-- ================================================================ +-- Basic JSONB Expression Index +-- ================================================================ +CREATE TABLE t(id INT PRIMARY KEY, docs JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_docs_name_idx ON t((docs->>'name')); +INSERT INTO t VALUES (1, '{"name": "alice", "age": 30}'); + +-- Update non-indexed JSONB field - should be HOT +UPDATE t SET docs = '{"name": "alice", "age": 31}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Update indexed JSONB field - should NOT be HOT +UPDATE t SET docs = '{"name": "bob", "age": 31}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Update non-indexed field again - should be HOT +UPDATE t SET docs = '{"name": "bob", "age": 32}' WHERE id = 1; +SELECT * FROM check_hot_updates(2, 't'); + +DROP TABLE t; + +-- ================================================================ +-- Partial Index with Predicate Transitions +-- ================================================================ +CREATE TABLE t(id INT, value INT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_value_idx ON t(value) WHERE value > 10; +INSERT INTO t VALUES (1, 5); + +-- Both outside predicate - should be HOT +UPDATE t SET value = 8 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Transition into predicate - should NOT be HOT +UPDATE t SET value = 15 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Both inside predicate, value changes - should NOT be HOT +UPDATE t SET value = 20 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Transition out of predicate - should NOT be HOT +UPDATE t SET value = 5 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Both outside predicate again - should be HOT +UPDATE t SET value = 3 WHERE id = 1; +SELECT * FROM check_hot_updates(2, 't'); + +DROP TABLE t; + +-- ================================================================ +-- Expression Index with Partial Predicate +-- ================================================================ +CREATE TABLE t(docs JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t((docs->>'status')) + WHERE (docs->>'priority')::int > 5; +INSERT INTO t VALUES ('{"status": "pending", "priority": 3}'); + +-- Both outside predicate, status unchanged - should be HOT +UPDATE t SET docs = '{"status": "pending", "priority": 4}'; +SELECT * FROM check_hot_updates(1, 't'); + +-- Transition into predicate - should NOT be HOT +UPDATE t SET docs = '{"status": "pending", "priority": 10}'; +SELECT * FROM check_hot_updates(1, 't'); + +-- Inside predicate, status changes - should NOT be HOT +UPDATE t SET docs = '{"status": "active", "priority": 10}'; +SELECT * FROM check_hot_updates(1, 't'); + +-- Inside predicate, status unchanged - should be HOT +UPDATE t SET docs = '{"status": "active", "priority": 8}'; +SELECT * FROM check_hot_updates(2, 't'); + +DROP TABLE t; + +-- ================================================================ +-- GIN Index on JSONB +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin_idx ON t USING gin(data); +INSERT INTO t VALUES (1, '{"tags": ["postgres", "database"]}'); + +-- Change tags - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["postgres", "sql"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); + +-- Change tags again - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); + +-- Add field without changing existing keys - GIN keys changed (added "note"), NOT HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"], "note": "test"}' WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); + +DROP TABLE t; + +-- ================================================================ +-- GIN Index with Unchanged Keys +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +-- Create GIN index on specific path +CREATE INDEX t_gin_idx ON t USING gin((data->'tags')); +INSERT INTO t VALUES (1, '{"tags": ["postgres", "sql"], "status": "active"}'); + +-- Change non-indexed field - GIN keys on 'tags' unchanged, should be HOT +UPDATE t SET data = '{"tags": ["postgres", "sql"], "status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Change indexed tags - GIN keys changed, should NOT be HOT +UPDATE t SET data = '{"tags": ["mysql", "sql"], "status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +DROP TABLE t; + +-- ================================================================ +-- GIN with jsonb_path_ops +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin_idx ON t USING gin(data jsonb_path_ops); +INSERT INTO t VALUES (1, '{"user": {"name": "alice"}, "tags": ["a", "b"]}'); + +-- Change value at different path - keys changed, NOT HOT +UPDATE t SET data = '{"user": {"name": "bob"}, "tags": ["a", "b"]}' WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); + +DROP TABLE t; + +-- ================================================================ +-- BRIN Index +-- ================================================================ +CREATE TABLE t(id INT, value INT, data TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +-- BRIN tracks min/max per block range +CREATE INDEX t_brin_idx ON t USING brin(value); +INSERT INTO t VALUES (1, 100, 'initial'); + +-- Update non-indexed column - BRIN doesn't care, should be HOT +UPDATE t SET data = 'updated' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Update indexed column but stay in same range - should still be HOT +-- (Note: BRIN tracks ranges, not exact values) +UPDATE t SET value = 105 WHERE id = 1; +SELECT * FROM check_hot_updates(2, 't'); + +-- Change to significantly different value - still HOT for single row +-- (BRIN summary won't change for single-row updates in same block) +UPDATE t SET value = 1000 WHERE id = 1; +SELECT * FROM check_hot_updates(3, 't'); + +DROP TABLE t; + +-- ================================================================ +-- Multi-Column Expression Index +-- ================================================================ +CREATE TABLE t(id INT, a INT, b INT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t(id, abs(a), abs(b)); +INSERT INTO t VALUES (1, -5, -10); + +-- Change sign but not abs value - should be HOT +UPDATE t SET a = 5 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Change abs value - should NOT be HOT +UPDATE t SET b = -15 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Change id - should NOT be HOT +UPDATE t SET id = 2 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +DROP TABLE t; + +-- ================================================================ +-- Mixed Index Types (BRIN + Expression) +-- ================================================================ +CREATE TABLE t(id INT, value INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_brin_idx ON t USING brin(value); +CREATE INDEX t_expr_idx ON t((data->>'status')); +INSERT INTO t VALUES (1, 100, '{"status": "active"}'); + +-- Update only BRIN column - should be HOT +UPDATE t SET value = 200 WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Update only expression column - should NOT be HOT +UPDATE t SET data = '{"status": "inactive"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Update both - should NOT be HOT +UPDATE t SET value = 300, data = '{"status": "pending"}' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +DROP TABLE t; + +-- ================================================================ +-- Expression with COLLATION +-- ================================================================ +CREATE TABLE t(id INT, name TEXT COLLATE "C") + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_lower_idx ON t(lower(name)); +INSERT INTO t VALUES (1, 'ALICE'); + +-- Change case but not lowercase value - should be HOT +UPDATE t SET name = 'Alice' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Change lowercase value - should NOT be HOT +UPDATE t SET name = 'BOB' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +DROP TABLE t; + +-- ================================================================ +-- Array Expression Index +-- ================================================================ +CREATE TABLE t(id INT, tags TEXT[]) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_array_len_idx ON t(array_length(tags, 1)); +INSERT INTO t VALUES (1, ARRAY['a', 'b', 'c']); + +-- Same length, different elements - should be HOT +UPDATE t SET tags = ARRAY['d', 'e', 'f'] WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Different length - should NOT be HOT +UPDATE t SET tags = ARRAY['d', 'e'] WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +DROP TABLE t; + +-- ================================================================ +-- Nested JSONB Expression +-- ================================================================ +CREATE TABLE t(data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_nested_idx ON t((data->'user'->>'name')); +INSERT INTO t VALUES ('{"user": {"name": "alice", "age": 30}}'); + +-- Change nested non-indexed field - should be HOT +UPDATE t SET data = '{"user": {"name": "alice", "age": 31}}'; +SELECT * FROM check_hot_updates(1, 't'); + +-- Change nested indexed field - should NOT be HOT +UPDATE t SET data = '{"user": {"name": "bob", "age": 31}}'; +SELECT * FROM check_hot_updates(1, 't'); + +DROP TABLE t; + +-- ================================================================ +-- Complex Predicate on Multiple JSONB Fields +-- ================================================================ +CREATE TABLE t(data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_idx ON t((data->>'status')) + WHERE (data->>'priority')::int > 5 + AND (data->>'active')::boolean = true; + +INSERT INTO t VALUES ('{"status": "pending", "priority": 3, "active": true}'); + +-- Outside predicate (priority too low) - should be HOT +UPDATE t SET data = '{"status": "done", "priority": 3, "active": true}'; +SELECT * FROM check_hot_updates(1, 't'); + +-- Transition into predicate - should NOT be HOT +UPDATE t SET data = '{"status": "done", "priority": 10, "active": true}'; +SELECT * FROM check_hot_updates(1, 't'); + +-- Inside predicate, change to outside (active = false) - should NOT be HOT +UPDATE t SET data = '{"status": "done", "priority": 10, "active": false}'; +SELECT * FROM check_hot_updates(1, 't'); + +DROP TABLE t; + +-- ================================================================ +-- TOASTed Values in Expression Index +-- ================================================================ +CREATE TABLE t(id INT, large_text TEXT) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_substr_idx ON t(substr(large_text, 1, 10)); + +INSERT INTO t VALUES (1, repeat('x', 5000) || 'identifier'); + +-- Change end of string, prefix unchanged - should be HOT +UPDATE t SET large_text = repeat('x', 5000) || 'different' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +-- Change prefix - should NOT be HOT +UPDATE t SET large_text = repeat('y', 5000) || 'different' WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); + +DROP TABLE t; + +-- ================================================================ +-- TEST: GIN with TOASTed TEXT (tsvector) +-- ================================================================ +CREATE TABLE t(id INT, content TEXT, search_vec tsvector) + WITH (autovacuum_enabled = off, fillfactor = 70); + +-- Create trigger to maintain tsvector +CREATE TRIGGER tsvectorupdate_toast + BEFORE INSERT OR UPDATE ON t + FOR EACH ROW EXECUTE FUNCTION + tsvector_update_trigger(search_vec, 'pg_catalog.english', content); + +CREATE INDEX t_gin ON t USING gin(search_vec); + +-- Insert with large content (will be TOASTed) +INSERT INTO t (id, content) VALUES + (1, repeat('important keyword ', 1000) || repeat('filler text ', 10000)); + +-- Verify initial state +SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('important'); +-- Expected: 1 row + +-- IMPORTANT: The BEFORE UPDATE trigger modifies search_vec, so by the time +-- ExecWhichIndexesRequireUpdates() runs, search_vec has already changed. +-- This means the comparison sees old tsvector vs. trigger-modified tsvector, +-- not the natural progression. HOT won't happen because the trigger changed +-- the indexed column. + +-- Update: Even though content keywords unchanged, trigger still fires +UPDATE t +SET content = repeat('important keyword ', 1000) || repeat('different filler ', 10000) +WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); +-- Expected: 0 HOT (trigger modifies search_vec, blocking HOT) +-- This is actually correct behavior - the trigger updated an indexed column + +-- Update: Change indexed keywords +UPDATE t +SET content = repeat('critical keyword ', 1000) || repeat('different filler ', 10000) +WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); +-- Expected: 0 HOT (index keys changed) + +-- Verify query correctness +SELECT count(*) FROM t WHERE search_vec @@ to_tsquery('critical'); +-- Expected: 1 row + +DROP TABLE t CASCADE; + +-- ================================================================ +-- TEST: GIN with TOASTed JSONB +-- ================================================================ +CREATE TABLE t(id INT, data JSONB) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin ON t USING gin((data->'tags')); + +-- Insert with TOASTed JSONB +INSERT INTO t (id, data) VALUES + (1, jsonb_build_object( + 'tags', '["postgres", "database"]'::jsonb, + 'large_field', repeat('x', 10000) + )); + +-- Update: Change large_field, tags unchanged - should be HOT +UPDATE t +SET data = jsonb_build_object( + 'tags', '["postgres", "database"]'::jsonb, + 'large_field', repeat('y', 10000) +) +WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); +-- Expected: 1 HOT update + +-- Update: Change tags - should NOT be HOT +UPDATE t +SET data = jsonb_build_object( + 'tags', '["postgres", "sql"]'::jsonb, + 'large_field', repeat('y', 10000) +) +WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); +-- Expected: Still 1 HOT + +-- Verify correctness +SELECT count(*) FROM t WHERE data->'tags' @> '["database"]'::jsonb; +-- Expected: 0 rows +SELECT count(*) FROM t WHERE data->'tags' @> '["sql"]'::jsonb; +-- Expected: 1 row + +DROP TABLE t CASCADE; + +-- ================================================================ +-- TEST: GIN with Array of Large Strings +-- ================================================================ +CREATE TABLE t(id INT, tags TEXT[]) + WITH (autovacuum_enabled = off, fillfactor = 70); +CREATE INDEX t_gin ON t USING gin(tags); + +-- Insert with large array elements (might be TOASTed) +INSERT INTO t (id, tags) VALUES + (1, ARRAY[repeat('tag1', 1000), repeat('tag2', 1000)]); + +-- Update: Change to different large values - NOT HOT +UPDATE t +SET tags = ARRAY[repeat('tag3', 1000), repeat('tag4', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(0, 't'); +-- Expected: 0 HOT (keys actually changed) + +-- Update: Keep same tag values, just reorder - SHOULD BE HOT +-- (GIN is order-insensitive: both [tag3,tag4] and [tag4,tag3] +-- extract to the same sorted key set ['tag3','tag4']) +UPDATE t +SET tags = ARRAY[repeat('tag4', 1000), repeat('tag3', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); +-- Expected: 1 HOT (GIN keys semantically identical) + +-- Update: Remove an element - NOT HOT (keys changed) +UPDATE t +SET tags = ARRAY[repeat('tag4', 1000)] +WHERE id = 1; +SELECT * FROM check_hot_updates(1, 't'); +-- Expected: Still 1 HOT (not this one) + +DROP TABLE t CASCADE; + +-- Cleanup +DROP FUNCTION check_hot_updates(int, text, text); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 23bce72ae64..52ef8f10b35 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -390,6 +390,7 @@ CachedFunctionCompileCallback CachedFunctionDeleteCallback CachedFunctionHashEntry CachedFunctionHashKey +CachedIndexDatum CachedPlan CachedPlanSource CallContext -- 2.49.0