From a341e85698df4d8f6d4d9bd9206346e0592b9a00 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 26 Mar 2024 21:00:37 +0200 Subject: [PATCH 8/8] Introduce RowID -- bytea tuple identifier Currently, there are two ways to reference the tuple: tuple identifier (tid) and whole row copy. The tuple identifier used for regular tables consists of 32-bit block number and 16-bit offset. This seems limited for some use-cases, in particular index-organized tables. The whole row copy used to identify tuples in FDW. That could be extended to regular tables, but that seems overkill. This commit introduces RowID -- new bytea tuple identifier. Table AM can choose the way tuple is identified by providing new get_row_ref_type() API function. New system attribute RowIdAttributeNumber holds RowID when appropriate. Table AM methods now accepts Datum arguments as tuple identifiers. Those Datum could be either tid or bytea depending on what table_get_row_ref_type() says. ModifyTable node and triggers are aware of RowID. IndexScan and BitmapScan nodes are not aware of RowIDs and expect tids. Table AMs which use RowIDs are supposed to redefine those nodes using hooks. --- contrib/amcheck/verify_nbtree.c | 3 +- src/backend/access/common/heaptuple.c | 4 + src/backend/access/heap/heapam_handler.c | 33 ++- src/backend/access/table/tableam.c | 4 +- src/backend/catalog/aclchk.c | 2 +- src/backend/commands/trigger.c | 251 ++++++++++++++++++----- src/backend/executor/execExprInterp.c | 4 +- src/backend/executor/execMain.c | 9 +- src/backend/executor/execReplication.c | 12 +- src/backend/executor/nodeLockRows.c | 17 +- src/backend/executor/nodeModifyTable.c | 145 ++++++++----- src/backend/executor/nodeTidscan.c | 2 +- src/backend/optimizer/plan/planner.c | 11 +- src/backend/optimizer/prep/preptlist.c | 16 ++ src/backend/optimizer/util/appendinfo.c | 33 ++- src/backend/optimizer/util/inherit.c | 20 ++ src/backend/parser/parse_relation.c | 13 ++ src/backend/rewrite/rewriteHandler.c | 1 + src/backend/utils/sort/tuplestore.c | 30 +++ src/include/access/sysattr.h | 3 +- src/include/access/tableam.h | 58 ++++-- src/include/commands/trigger.h | 4 +- src/include/nodes/parsenodes.h | 2 + src/include/nodes/plannodes.h | 21 -- src/include/nodes/primnodes.h | 22 ++ src/include/utils/tuplestore.h | 3 + 26 files changed, 548 insertions(+), 175 deletions(-) diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c index f71f1854e0a..7bfa2a2fc44 100644 --- a/contrib/amcheck/verify_nbtree.c +++ b/contrib/amcheck/verify_nbtree.c @@ -984,7 +984,8 @@ heap_entry_is_visible(BtreeCheckState *state, ItemPointer tid) TupleTableSlot *slot = table_slot_create(state->heaprel, NULL); tid_visible = table_tuple_fetch_row_version(state->heaprel, - tid, state->snapshot, slot); + PointerGetDatum(tid), + state->snapshot, slot); if (slot != NULL) ExecDropSingleTupleTableSlot(slot); diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c index 5c89fbbef83..7b52c66939c 100644 --- a/src/backend/access/common/heaptuple.c +++ b/src/backend/access/common/heaptuple.c @@ -755,6 +755,10 @@ heap_getsysattr(HeapTuple tup, int attnum, TupleDesc tupleDesc, bool *isnull) case TableOidAttributeNumber: result = ObjectIdGetDatum(tup->t_tableOid); break; + case RowIdAttributeNumber: + *isnull = true; + result = 0; + break; default: elog(ERROR, "invalid attnum: %d", attnum); result = 0; /* keep compiler quiet */ diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 8ddb90e7ce1..6a1bd3ae476 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -50,7 +50,7 @@ #include "utils/sampling.h" #include "utils/spccache.h" -static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, +static TM_Result heapam_tuple_lock(Relation relation, Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, @@ -194,7 +194,7 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan, static bool heapam_fetch_row_version(Relation relation, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot) { @@ -203,7 +203,7 @@ heapam_fetch_row_version(Relation relation, Assert(TTS_IS_BUFFERTUPLE(slot)); - bslot->base.tupdata.t_self = *tid; + bslot->base.tupdata.t_self = *DatumGetItemPointer(tupleid); if (heap_fetch(relation, snapshot, &bslot->base.tupdata, &buffer, false)) { /* store in slot, transferring existing pin */ @@ -368,7 +368,7 @@ ExecCheckTIDVisible(EState *estate, if (!IsolationUsesXactSnapshot()) return; - if (!table_tuple_fetch_row_version(rel, tid, + if (!table_tuple_fetch_row_version(rel, PointerGetDatum(tid), SnapshotAny, tempSlot)) elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT"); ExecCheckTupleVisible(estate, rel, tempSlot); @@ -415,7 +415,7 @@ heapam_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo, * here means our previous conclusion that the tuple is * conclusively committed is not true anymore. */ - test = table_tuple_lock(rel, &conflictTid, + test = table_tuple_lock(rel, PointerGetDatum(&conflictTid), estate->es_snapshot, lockedSlot, estate->es_output_cid, lockmode, LockWaitBlock, 0, @@ -595,12 +595,13 @@ heapam_tuple_insert_with_arbiter(ResultRelInfo *resultRelInfo, } static TM_Result -heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, +heapam_tuple_delete(Relation relation, Datum tupleid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, bool changingPart, TupleTableSlot *oldSlot) { TM_Result result; + ItemPointer tid = DatumGetItemPointer(tupleid); /* * Currently Deleting of index tuples are handled at vacuum, in case if @@ -623,7 +624,7 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, * heapam_tuple_lock() will take advantage of tuple loaded into * oldSlot by heap_delete(). */ - result = heapam_tuple_lock(relation, tid, snapshot, + result = heapam_tuple_lock(relation, tupleid, snapshot, oldSlot, cid, LockTupleExclusive, (options & TABLE_MODIFY_WAIT) ? LockWaitBlock : @@ -640,7 +641,7 @@ heapam_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, static TM_Result -heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, +heapam_tuple_update(Relation relation, Datum tupleid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes, @@ -648,6 +649,7 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, { bool shouldFree = true; HeapTuple tuple = ExecFetchSlotHeapTuple(slot, true, &shouldFree); + ItemPointer otid = DatumGetItemPointer(tupleid); TM_Result result; /* Update the tuple with table oid */ @@ -695,7 +697,7 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, * heapam_tuple_lock() will take advantage of tuple loaded into * oldSlot by heap_update(). */ - result = heapam_tuple_lock(relation, otid, snapshot, + result = heapam_tuple_lock(relation, tupleid, snapshot, oldSlot, cid, *lockmode, (options & TABLE_MODIFY_WAIT) ? LockWaitBlock : @@ -711,7 +713,7 @@ heapam_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, } static TM_Result -heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, +heapam_tuple_lock(Relation relation, Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd) @@ -719,6 +721,7 @@ heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; TM_Result result; HeapTuple tuple = &bslot->base.tupdata; + ItemPointer tid = DatumGetItemPointer(tupleid); bool follow_updates; follow_updates = (flags & TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS) != 0; @@ -2657,6 +2660,15 @@ heapam_scan_get_blocks_done(HeapScanDesc hscan) * ------------------------------------------------------------------------ */ +/* + * All heap tables use TID row identifier. + */ +static RowRefType +heapam_get_row_ref_type(Relation rel) +{ + return ROW_REF_TID; +} + /* * Check to see whether the table needs a TOAST table. It does only if * (1) there are any toastable attributes, and (2) the maximum length @@ -3235,6 +3247,7 @@ static const TableAmRoutine heapam_methods = { .define_index_validate = NULL, .define_index = NULL, + .get_row_ref_type = heapam_get_row_ref_type, .free_rd_amcache = NULL, .relation_size = table_block_relation_size, .relation_needs_toast_table = heapam_relation_needs_toast_table, diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 805d222cebc..caa79c6eddd 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -300,7 +300,7 @@ simple_table_tuple_delete(Relation rel, ItemPointer tid, Snapshot snapshot, if (oldSlot) options |= TABLE_MODIFY_FETCH_OLD_TUPLE; - result = table_tuple_delete(rel, tid, + result = table_tuple_delete(rel, PointerGetDatum(tid), GetCurrentCommandId(true), snapshot, InvalidSnapshot, options, @@ -356,7 +356,7 @@ simple_table_tuple_update(Relation rel, ItemPointer otid, if (oldSlot) options |= TABLE_MODIFY_FETCH_OLD_TUPLE; - result = table_tuple_update(rel, otid, slot, + result = table_tuple_update(rel, PointerGetDatum(otid), slot, GetCurrentCommandId(true), snapshot, InvalidSnapshot, options, diff --git a/src/backend/catalog/aclchk.c b/src/backend/catalog/aclchk.c index 7abf3c2a74a..8765becf986 100644 --- a/src/backend/catalog/aclchk.c +++ b/src/backend/catalog/aclchk.c @@ -1626,7 +1626,7 @@ expand_all_col_privileges(Oid table_oid, Form_pg_class classForm, AttrNumber curr_att; Assert(classForm->relnatts - FirstLowInvalidHeapAttributeNumber < num_col_privileges); - for (curr_att = FirstLowInvalidHeapAttributeNumber + 1; + for (curr_att = FirstLowInvalidHeapAttributeNumber + 2; curr_att <= classForm->relnatts; curr_att++) { diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 3309b4ebd2d..b2248bdfd87 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -76,7 +76,7 @@ static void SetTriggerFlags(TriggerDesc *trigdesc, Trigger *trigger); static bool GetTupleForTrigger(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tid, + Datum tupleid, LockTupleMode lockmode, TupleTableSlot *oldslot, TupleTableSlot **epqslot, @@ -2682,7 +2682,7 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo, bool ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot **epqslot, TM_Result *tmresult, @@ -2696,7 +2696,7 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, bool should_free = false; int i; - Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); + Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL)); if (fdw_trigtuple == NULL) { TupleTableSlot *epqslot_candidate = NULL; @@ -2924,7 +2924,7 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo, bool ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot *newslot, TM_Result *tmresult, @@ -2944,7 +2944,7 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, /* Determine lock mode to use */ lockmode = ExecUpdateLockMode(estate, relinfo); - Assert(HeapTupleIsValid(fdw_trigtuple) ^ ItemPointerIsValid(tupleid)); + Assert(HeapTupleIsValid(fdw_trigtuple) ^ (DatumGetPointer(tupleid) != NULL)); if (fdw_trigtuple == NULL) { TupleTableSlot *epqslot_candidate = NULL; @@ -3261,7 +3261,7 @@ static bool GetTupleForTrigger(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tid, + Datum tupleid, LockTupleMode lockmode, TupleTableSlot *oldslot, TupleTableSlot **epqslot, @@ -3286,7 +3286,9 @@ GetTupleForTrigger(EState *estate, */ if (!IsolationUsesXactSnapshot()) lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION; - test = table_tuple_lock(relation, tid, estate->es_snapshot, oldslot, + + test = table_tuple_lock(relation, tupleid, + estate->es_snapshot, oldslot, estate->es_output_cid, lockmode, LockWaitBlock, lockflags, @@ -3382,8 +3384,8 @@ GetTupleForTrigger(EState *estate, * We expect the tuple to be present, thus very simple error handling * suffices. */ - if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny, - oldslot)) + if (!table_tuple_fetch_row_version(relation, tupleid, + SnapshotAny, oldslot)) elog(ERROR, "failed to fetch tuple for trigger"); } @@ -3589,18 +3591,24 @@ typedef SetConstraintStateData *SetConstraintState; * cycles. So we need only ensure that ats_firing_id is zero when attaching * a new event to an existing AfterTriggerSharedData record. */ -typedef uint32 TriggerFlags; - -#define AFTER_TRIGGER_OFFSET 0x07FFFFFF /* must be low-order bits */ -#define AFTER_TRIGGER_DONE 0x80000000 -#define AFTER_TRIGGER_IN_PROGRESS 0x40000000 +typedef uint64 TriggerFlags; + +#define AFTER_TRIGGER_SIZE UINT64CONST(0xFFFF000000000) /* must be low-order + * bits */ +#define AFTER_TRIGGER_SIZE_SHIFT (36) +#define AFTER_TRIGGER_OFFSET UINT64CONST(0x000000FFFFFFF) /* must be low-order + * bits */ +#define AFTER_TRIGGER_DONE UINT64CONST(0x0000800000000) +#define AFTER_TRIGGER_IN_PROGRESS UINT64CONST(0x0000400000000) /* bits describing the size and tuple sources of this event */ -#define AFTER_TRIGGER_FDW_REUSE 0x00000000 -#define AFTER_TRIGGER_FDW_FETCH 0x20000000 -#define AFTER_TRIGGER_1CTID 0x10000000 -#define AFTER_TRIGGER_2CTID 0x30000000 -#define AFTER_TRIGGER_CP_UPDATE 0x08000000 -#define AFTER_TRIGGER_TUP_BITS 0x38000000 +#define AFTER_TRIGGER_FDW_REUSE UINT64CONST(0x0000000000000) +#define AFTER_TRIGGER_FDW_FETCH UINT64CONST(0x0000200000000) +#define AFTER_TRIGGER_1CTID UINT64CONST(0x0000100000000) +#define AFTER_TRIGGER_ROWID1 UINT64CONST(0x0000010000000) +#define AFTER_TRIGGER_2CTID UINT64CONST(0x0000300000000) +#define AFTER_TRIGGER_ROWID2 UINT64CONST(0x0000020000000) +#define AFTER_TRIGGER_CP_UPDATE UINT64CONST(0x0000080000000) +#define AFTER_TRIGGER_TUP_BITS UINT64CONST(0x0000380000000) typedef struct AfterTriggerSharedData *AfterTriggerShared; typedef struct AfterTriggerSharedData @@ -3652,6 +3660,9 @@ typedef struct AfterTriggerEventDataZeroCtids } AfterTriggerEventDataZeroCtids; #define SizeofTriggerEvent(evt) \ + (((evt)->ate_flags & AFTER_TRIGGER_SIZE) >> AFTER_TRIGGER_SIZE_SHIFT) + +#define BasicSizeofTriggerEvent(evt) \ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \ sizeof(AfterTriggerEventData) : \ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \ @@ -4004,14 +4015,34 @@ afterTriggerCopyBitmap(Bitmapset *src) */ static void afterTriggerAddEvent(AfterTriggerEventList *events, - AfterTriggerEvent event, AfterTriggerShared evtshared) + AfterTriggerEvent event, AfterTriggerShared evtshared, + bytea *rowid1, bytea *rowid2) { - Size eventsize = SizeofTriggerEvent(event); - Size needed = eventsize + sizeof(AfterTriggerSharedData); + Size basiceventsize = MAXALIGN(BasicSizeofTriggerEvent(event)); + Size eventsize; + Size needed; AfterTriggerEventChunk *chunk; AfterTriggerShared newshared; AfterTriggerEvent newevent; + if (SizeofTriggerEvent(event) == 0) + { + eventsize = basiceventsize; + if (event->ate_flags & AFTER_TRIGGER_ROWID1) + eventsize += MAXALIGN(VARSIZE(rowid1)); + + if (event->ate_flags & AFTER_TRIGGER_ROWID2) + eventsize += MAXALIGN(VARSIZE(rowid2)); + + event->ate_flags |= eventsize << AFTER_TRIGGER_SIZE_SHIFT; + } + else + { + eventsize = SizeofTriggerEvent(event); + } + + needed = eventsize + sizeof(AfterTriggerSharedData); + /* * If empty list or not enough room in the tail chunk, make a new chunk. * We assume here that a new shared record will always be needed. @@ -4044,7 +4075,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events, * sizes used should be MAXALIGN multiples, to ensure that the shared * records will be aligned safely. */ -#define MIN_CHUNK_SIZE 1024 +#define MIN_CHUNK_SIZE (1024*4) #define MAX_CHUNK_SIZE (1024*1024) #if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1) @@ -4063,6 +4094,7 @@ afterTriggerAddEvent(AfterTriggerEventList *events, chunksize *= 2; /* okay, double it */ else chunksize /= 2; /* too many shared records */ + chunksize = Max(chunksize, MIN_CHUNK_SIZE); chunksize = Min(chunksize, MAX_CHUNK_SIZE); } chunk = MemoryContextAlloc(afterTriggers.event_cxt, chunksize); @@ -4103,7 +4135,26 @@ afterTriggerAddEvent(AfterTriggerEventList *events, /* Insert the data */ newevent = (AfterTriggerEvent) chunk->freeptr; - memcpy(newevent, event, eventsize); + if (!rowid1 && !rowid2) + { + memcpy(newevent, event, eventsize); + } + else + { + Pointer ptr = chunk->freeptr; + + memcpy(newevent, event, basiceventsize); + ptr += basiceventsize; + + if (event->ate_flags & AFTER_TRIGGER_ROWID1) + { + memcpy(ptr, rowid1, MAXALIGN(VARSIZE(rowid1))); + ptr += MAXALIGN(VARSIZE(rowid1)); + } + + if (event->ate_flags & AFTER_TRIGGER_ROWID2) + memcpy(ptr, rowid2, MAXALIGN(VARSIZE(rowid2))); + } /* ... and link the new event to its shared record */ newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET; newevent->ate_flags |= (char *) newshared - (char *) newevent; @@ -4263,6 +4314,7 @@ AfterTriggerExecute(EState *estate, int tgindx; bool should_free_trig = false; bool should_free_new = false; + Pointer ptr; /* * Locate trigger in trigdesc. @@ -4294,15 +4346,17 @@ AfterTriggerExecute(EState *estate, { Tuplestorestate *fdw_tuplestore = GetCurrentFDWTuplestore(); - if (!tuplestore_gettupleslot(fdw_tuplestore, true, false, - trig_tuple_slot1)) + if (!tuplestore_force_gettupleslot(fdw_tuplestore, true, false, + trig_tuple_slot1)) elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); if ((evtshared->ats_event & TRIGGER_EVENT_OPMASK) == TRIGGER_EVENT_UPDATE && - !tuplestore_gettupleslot(fdw_tuplestore, true, false, - trig_tuple_slot2)) + !tuplestore_force_gettupleslot(fdw_tuplestore, true, false, + trig_tuple_slot2)) elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); + trig_tuple_slot1->tts_tid = event->ate_ctid1; + trig_tuple_slot2->tts_tid = event->ate_ctid2; } /* fall through */ case AFTER_TRIGGER_FDW_REUSE: @@ -4334,13 +4388,26 @@ AfterTriggerExecute(EState *estate, break; default: - if (ItemPointerIsValid(&(event->ate_ctid1))) + ptr = (Pointer) event + MAXALIGN(BasicSizeofTriggerEvent(event)); + if (ItemPointerIsValid(&(event->ate_ctid1)) || + (event->ate_flags & AFTER_TRIGGER_ROWID1)) { + Datum tupleid; + TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate, src_relInfo); - if (!table_tuple_fetch_row_version(src_rel, - &(event->ate_ctid1), + if (event->ate_flags & AFTER_TRIGGER_ROWID1) + { + tupleid = PointerGetDatum(ptr); + ptr += MAXALIGN(VARSIZE(ptr)); + } + else + { + tupleid = PointerGetDatum(&(event->ate_ctid1)); + } + + if (!table_tuple_fetch_row_version(src_rel, tupleid, SnapshotAny, src_slot)) elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); @@ -4376,13 +4443,23 @@ AfterTriggerExecute(EState *estate, /* don't touch ctid2 if not there */ if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID || (event->ate_flags & AFTER_TRIGGER_CP_UPDATE)) && - ItemPointerIsValid(&(event->ate_ctid2))) + (ItemPointerIsValid(&(event->ate_ctid2)) || + (event->ate_flags & AFTER_TRIGGER_ROWID2))) { + Datum tupleid; + TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate, dst_relInfo); - if (!table_tuple_fetch_row_version(dst_rel, - &(event->ate_ctid2), + if (event->ate_flags & AFTER_TRIGGER_ROWID2) + { + tupleid = PointerGetDatum(ptr); + } + else + { + tupleid = PointerGetDatum(&(event->ate_ctid2)); + } + if (!table_tuple_fetch_row_version(dst_rel, tupleid, SnapshotAny, dst_slot)) elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); @@ -4556,7 +4633,7 @@ afterTriggerMarkEvents(AfterTriggerEventList *events, { deferred_found = true; /* add it to move_list */ - afterTriggerAddEvent(move_list, event, evtshared); + afterTriggerAddEvent(move_list, event, evtshared, NULL, NULL); /* mark original copy "done" so we don't do it again */ event->ate_flags |= AFTER_TRIGGER_DONE; } @@ -4659,6 +4736,7 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events, trigdesc = rInfo->ri_TrigDesc; finfo = rInfo->ri_TrigFunctions; instr = rInfo->ri_TrigInstrument; + if (slot1 != NULL) { ExecDropSingleTupleTableSlot(slot1); @@ -6051,6 +6129,8 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, int tgtype_level; int i; Tuplestorestate *fdw_tuplestore = NULL; + bytea *rowId1 = NULL; + bytea *rowId2 = NULL; /* * Check state. We use a normal test not Assert because it is possible to @@ -6144,6 +6224,12 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, * if so. This preserves the behavior that statement-level triggers fire * just once per statement and fire after row-level triggers. */ + + /* Determine flags */ + if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger)) + new_event.ate_flags = (row_trigger && event == TRIGGER_EVENT_UPDATE) ? + AFTER_TRIGGER_2CTID : AFTER_TRIGGER_1CTID; + switch (event) { case TRIGGER_EVENT_INSERT: @@ -6154,6 +6240,14 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, Assert(newslot != NULL); ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); + if (table_get_row_ref_type(rel) == ROW_REF_ROWID) + { + bool isnull; + + rowId1 = DatumGetByteaP(slot_getsysattr(newslot, RowIdAttributeNumber, &isnull)); + new_event.ate_flags |= AFTER_TRIGGER_ROWID1; + Assert(!isnull); + } } else { @@ -6173,6 +6267,14 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, Assert(newslot == NULL); ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerSetInvalid(&(new_event.ate_ctid2)); + if (table_get_row_ref_type(rel) == ROW_REF_ROWID) + { + bool isnull; + + rowId1 = DatumGetByteaP(slot_getsysattr(oldslot, RowIdAttributeNumber, &isnull)); + new_event.ate_flags |= AFTER_TRIGGER_ROWID1; + Assert(!isnull); + } } else { @@ -6188,10 +6290,57 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, tgtype_event = TRIGGER_TYPE_UPDATE; if (row_trigger) { + bool src_rowid = false, + dst_rowid = false; + Assert(oldslot != NULL); Assert(newslot != NULL); ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1)); ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2)); + if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + { + Relation src_rel = src_partinfo->ri_RelationDesc; + Relation dst_rel = dst_partinfo->ri_RelationDesc; + + src_rowid = table_get_row_ref_type(src_rel) == + ROW_REF_ROWID; + dst_rowid = table_get_row_ref_type(dst_rel) == + ROW_REF_ROWID; + } + else + { + if (table_get_row_ref_type(rel) == ROW_REF_ROWID) + { + src_rowid = true; + dst_rowid = true; + } + } + + if (src_rowid) + { + Datum val; + bool isnull; + + val = slot_getsysattr(oldslot, + RowIdAttributeNumber, + &isnull); + rowId1 = DatumGetByteaP(val); + Assert(!isnull); + new_event.ate_flags |= AFTER_TRIGGER_ROWID1; + } + + if (dst_rowid) + { + Datum val; + bool isnull; + + val = slot_getsysattr(newslot, + RowIdAttributeNumber, + &isnull); + rowId2 = DatumGetByteaP(val); + Assert(!isnull); + new_event.ate_flags |= AFTER_TRIGGER_ROWID2; + } /* * Also remember the OIDs of partitions to fetch these tuples @@ -6229,20 +6378,6 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, break; } - /* Determine flags */ - if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger)) - { - if (row_trigger && event == TRIGGER_EVENT_UPDATE) - { - if (relkind == RELKIND_PARTITIONED_TABLE) - new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE; - else - new_event.ate_flags = AFTER_TRIGGER_2CTID; - } - else - new_event.ate_flags = AFTER_TRIGGER_1CTID; - } - /* else, we'll initialize ate_flags for each trigger */ tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT); @@ -6387,6 +6522,20 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, continue; /* Uniqueness definitely not violated */ } + /* Determine flags */ + if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger)) + { + if (row_trigger && event == TRIGGER_EVENT_UPDATE) + { + if (relkind == RELKIND_PARTITIONED_TABLE) + new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE; + else + new_event.ate_flags = AFTER_TRIGGER_2CTID; + } + else + new_event.ate_flags = AFTER_TRIGGER_1CTID; + } + /* * Fill in event structure and add it to the current query's queue. * Note we set ats_table to NULL whenever this trigger doesn't use @@ -6408,7 +6557,7 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo, new_shared.ats_modifiedcols = afterTriggerCopyBitmap(modifiedCols); afterTriggerAddEvent(&afterTriggers.query_stack[afterTriggers.query_depth].events, - &new_event, &new_shared); + &new_event, &new_shared, rowId1, rowId2); } /* diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 24a3990a30a..c8ce4d45ff4 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -4888,7 +4888,9 @@ ExecEvalSysVar(ExprState *state, ExprEvalStep *op, ExprContext *econtext, op->resnull); *op->resvalue = d; /* this ought to be unreachable, but it's cheap enough to check */ - if (unlikely(*op->resnull)) + if (op->d.var.attnum != RowIdAttributeNumber && + op->d.var.attnum != SelfItemPointerAttributeNumber && + unlikely(*op->resnull)) elog(ERROR, "failed to fetch attribute from slot"); } diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 3b03f03a98d..514d9b28c48 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -867,13 +867,15 @@ InitPlan(QueryDesc *queryDesc, int eflags) Oid relid; Relation relation; ExecRowMark *erm; + RangeTblEntry *rangeEntry; /* ignore "parent" rowmarks; they are irrelevant at runtime */ if (rc->isParent) continue; /* get relation's OID (will produce InvalidOid if subquery) */ - relid = exec_rt_fetch(rc->rti, estate)->relid; + rangeEntry = exec_rt_fetch(rc->rti, estate); + relid = rangeEntry->relid; /* * Open relation, if we need to access it for this reference type. @@ -903,7 +905,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) erm->prti = rc->prti; erm->rowmarkId = rc->rowmarkId; erm->markType = rc->markType; - erm->refType = rc->refType; + erm->refType = rangeEntry->reftype; erm->strength = rc->strength; erm->waitPolicy = rc->waitPolicy; erm->ermActive = false; @@ -1267,6 +1269,7 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo, resultRelInfo->ri_ChildToRootMap = NULL; resultRelInfo->ri_ChildToRootMapValid = false; resultRelInfo->ri_CopyMultiInsertBuffer = NULL; + resultRelInfo->ri_RowRefType = table_get_row_ref_type(resultRelationDesc); } /* @@ -2708,7 +2711,7 @@ EvalPlanQualFetchRowMark(EPQState *epqstate, Index rti, TupleTableSlot *slot) { /* ordinary table, fetch the tuple */ if (!table_tuple_fetch_row_version(erm->relation, - (ItemPointer) DatumGetPointer(datum), + datum, SnapshotAny, slot)) elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck"); return true; diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index db685473fc0..aad266a19ff 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -250,7 +250,8 @@ retry: PushActiveSnapshot(GetLatestSnapshot()); - res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(), + res = table_tuple_lock(rel, PointerGetDatum(&(outslot->tts_tid)), + GetLatestSnapshot(), outslot, GetCurrentCommandId(false), lockmode, @@ -434,7 +435,8 @@ retry: PushActiveSnapshot(GetLatestSnapshot()); - res = table_tuple_lock(rel, &(outslot->tts_tid), GetLatestSnapshot(), + res = table_tuple_lock(rel, PointerGetDatum(&(outslot->tts_tid)), + GetLatestSnapshot(), outslot, GetCurrentCommandId(false), lockmode, @@ -571,7 +573,8 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo, resultRelInfo->ri_TrigDesc->trig_update_before_row) { if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo, - tid, NULL, slot, NULL, NULL)) + PointerGetDatum(tid), NULL, slot, + NULL, NULL)) skip_tuple = true; /* "do nothing" */ } @@ -638,7 +641,8 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo, resultRelInfo->ri_TrigDesc->trig_delete_before_row) { skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo, - tid, NULL, NULL, NULL, NULL); + PointerGetDatum(tid), NULL, NULL, + NULL, NULL); } if (!skip_tuple) diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index 41754ddfea9..2d3ad904a64 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -27,6 +27,7 @@ #include "executor/nodeLockRows.h" #include "foreign/fdwapi.h" #include "miscadmin.h" +#include "utils/datum.h" #include "utils/rel.h" @@ -157,7 +158,16 @@ lnext: } /* okay, try to lock (and fetch) the tuple */ - tid = *((ItemPointer) DatumGetPointer(datum)); + if (erm->refType == ROW_REF_TID) + { + tid = *((ItemPointer) DatumGetPointer(datum)); + datum = PointerGetDatum(&tid); + } + else + { + Assert(erm->refType == ROW_REF_ROWID); + datum = datumCopy(datum, false, -1); + } switch (erm->markType) { case ROW_MARK_EXCLUSIVE: @@ -182,12 +192,15 @@ lnext: if (!IsolationUsesXactSnapshot()) lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION; - test = table_tuple_lock(erm->relation, &tid, estate->es_snapshot, + test = table_tuple_lock(erm->relation, datum, estate->es_snapshot, markSlot, estate->es_output_cid, lockmode, erm->waitPolicy, lockflags, &tmfd); + if (erm->refType == ROW_REF_ROWID) + pfree(DatumGetPointer(datum)); + switch (test) { case TM_WouldBlock: diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index a64e37e9af9..90eeb99b2cd 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -124,7 +124,7 @@ static void ExecPendingInserts(EState *estate); static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, ResultRelInfo *sourcePartInfo, ResultRelInfo *destPartInfo, - ItemPointer tupleid, + Datum tupleid, TupleTableSlot *oldslot, TupleTableSlot *newslot); static bool ExecOnConflictUpdate(ModifyTableContext *context, @@ -141,13 +141,13 @@ static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate, static TupleTableSlot *ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple oldtuple, bool canSetTag); static void ExecInitMerge(ModifyTableState *mtstate, EState *estate); static TupleTableSlot *ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple oldtuple, bool canSetTag, bool *matched); @@ -1221,7 +1221,7 @@ ExecPendingInserts(EState *estate) */ static bool ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot **epqreturnslot, TM_Result *result) { if (result) @@ -1252,7 +1252,7 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static TM_Result ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, bool changingPart, int options, + Datum tupleid, bool changingPart, int options, TupleTableSlot *oldSlot) { EState *estate = context->estate; @@ -1280,7 +1280,7 @@ ExecDeleteAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static void ExecDeleteEpilogue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, + HeapTuple oldtuple, TupleTableSlot *slot, bool changingPart) { ModifyTableState *mtstate = context->mtstate; @@ -1361,7 +1361,7 @@ ExecInitDeleteTupleSlot(ModifyTableState *mtstate, static TupleTableSlot * ExecDelete(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *oldslot, bool processReturning, @@ -1558,7 +1558,7 @@ ldelete: if (tupleDeleted) *tupleDeleted = true; - ExecDeleteEpilogue(context, resultRelInfo, tupleid, oldtuple, + ExecDeleteEpilogue(context, resultRelInfo, oldtuple, oldslot, changingPart); /* Process RETURNING if present and if requested */ @@ -1575,7 +1575,7 @@ ldelete: /* FDW must have provided a slot containing the deleted row */ Assert(!TupIsNull(slot)); } - else + else if (!slot || TupIsNull(slot)) { /* Copy old tuple to the returning slot */ slot = ExecGetReturningSlot(estate, resultRelInfo); @@ -1624,7 +1624,7 @@ ldelete: static bool ExecCrossPartitionUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, bool canSetTag, UpdateContext *updateCxt, @@ -1783,7 +1783,7 @@ ExecCrossPartitionUpdate(ModifyTableContext *context, */ static bool ExecUpdatePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, TM_Result *result) { Relation resultRelationDesc = resultRelInfo->ri_RelationDesc; @@ -1860,7 +1860,7 @@ ExecUpdatePrepareSlot(ResultRelInfo *resultRelInfo, */ static TM_Result ExecUpdateAct(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, bool canSetTag, int options, TupleTableSlot *oldSlot, UpdateContext *updateCxt) { @@ -2014,7 +2014,7 @@ lreplace: */ static void ExecUpdateEpilogue(ModifyTableContext *context, UpdateContext *updateCxt, - ResultRelInfo *resultRelInfo, ItemPointer tupleid, + ResultRelInfo *resultRelInfo, HeapTuple oldtuple, TupleTableSlot *slot, TupleTableSlot *oldslot) { @@ -2064,7 +2064,7 @@ static void ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, ResultRelInfo *sourcePartInfo, ResultRelInfo *destPartInfo, - ItemPointer tupleid, + Datum tupleid, TupleTableSlot *oldslot, TupleTableSlot *newslot) { @@ -2154,7 +2154,7 @@ ExecCrossPartitionUpdateForeignKey(ModifyTableContext *context, */ static TupleTableSlot * ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, TupleTableSlot *slot, + Datum tupleid, HeapTuple oldtuple, TupleTableSlot *slot, TupleTableSlot *oldslot, bool canSetTag, bool locked) { EState *estate = context->estate; @@ -2208,15 +2208,19 @@ ExecUpdate(ModifyTableContext *context, ResultRelInfo *resultRelInfo, } else { - int options = TABLE_MODIFY_WAIT | TABLE_MODIFY_FETCH_OLD_TUPLE; + int options = TABLE_MODIFY_WAIT; /* * Specify that we need to lock and fetch the last tuple version for * EPQ on appropriate transaction isolation levels if the tuple isn't * locked already. */ - if (!locked && !IsolationUsesXactSnapshot()) - options |= TABLE_MODIFY_LOCK_UPDATED; + if (!locked) + { + options |= TABLE_MODIFY_FETCH_OLD_TUPLE; + if (!IsolationUsesXactSnapshot()) + options |= TABLE_MODIFY_LOCK_UPDATED; + } /* * If we generate a new candidate tuple after EvalPlanQual testing, we @@ -2326,7 +2330,7 @@ redo_act: if (canSetTag) (estate->es_processed)++; - ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, tupleid, oldtuple, + ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, oldtuple, slot, oldslot); /* Process RETURNING if present */ @@ -2358,7 +2362,19 @@ ExecOnConflictUpdate(ModifyTableContext *context, ExprContext *econtext = mtstate->ps.ps_ExprContext; ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause; TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing; - ItemPointer conflictTid = &existing->tts_tid; + Datum tupleid; + + if (table_get_row_ref_type(resultRelInfo->ri_RelationDesc) == ROW_REF_ROWID) + { + bool isnull; + + tupleid = slot_getsysattr(existing, RowIdAttributeNumber, &isnull); + Assert(!isnull); + } + else + { + tupleid = PointerGetDatum(&existing->tts_tid); + } /* * Make tuple and any needed join variables available to ExecQual and @@ -2414,7 +2430,7 @@ ExecOnConflictUpdate(ModifyTableContext *context, /* Execute UPDATE with projection */ *returning = ExecUpdate(context, resultRelInfo, - conflictTid, NULL, + tupleid, NULL, resultRelInfo->ri_onConflict->oc_ProjSlot, existing, canSetTag, true); @@ -2433,7 +2449,7 @@ ExecOnConflictUpdate(ModifyTableContext *context, */ static TupleTableSlot * ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, bool canSetTag) + Datum tupleid, HeapTuple oldtuple, bool canSetTag) { TupleTableSlot *rslot = NULL; bool matched; @@ -2482,7 +2498,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, * from ExecMergeNotMatched to ExecMergeMatched, there is no risk of a * livelock. */ - matched = tupleid != NULL || oldtuple != NULL; + matched = DatumGetPointer(tupleid) != NULL || oldtuple != NULL; if (matched) rslot = ExecMergeMatched(context, resultRelInfo, tupleid, oldtuple, canSetTag, &matched); @@ -2523,7 +2539,7 @@ ExecMerge(ModifyTableContext *context, ResultRelInfo *resultRelInfo, */ static TupleTableSlot * ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, - ItemPointer tupleid, HeapTuple oldtuple, bool canSetTag, + Datum tupleid, HeapTuple oldtuple, bool canSetTag, bool *matched) { ModifyTableState *mtstate = context->mtstate; @@ -2559,7 +2575,7 @@ ExecMergeMatched(ModifyTableContext *context, ResultRelInfo *resultRelInfo, * the tupleid of the target row, or an old tuple from the target wholerow * junk attr. */ - Assert(tupleid != NULL || oldtuple != NULL); + Assert(DatumGetPointer(tupleid) != NULL || oldtuple != NULL); if (oldtuple != NULL) ExecForceStoreHeapTuple(oldtuple, resultRelInfo->ri_oldTupleSlot, false); @@ -2573,7 +2589,7 @@ lmerge_matched: * EvalPlanQual returns us a new tuple, which may not be visible to our * MVCC snapshot. */ - if (tupleid != NULL) + if (DatumGetPointer(tupleid) != NULL) { if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc, tupleid, @@ -2682,7 +2698,7 @@ lmerge_matched: if (result == TM_Ok) { ExecUpdateEpilogue(context, &updateCxt, resultRelInfo, - tupleid, NULL, newslot, + NULL, newslot, resultRelInfo->ri_oldTupleSlot); mtstate->mt_merge_updated += 1; } @@ -2718,7 +2734,7 @@ lmerge_matched: if (result == TM_Ok) { - ExecDeleteEpilogue(context, resultRelInfo, tupleid, NULL, + ExecDeleteEpilogue(context, resultRelInfo, NULL, resultRelInfo->ri_oldTupleSlot, false); mtstate->mt_merge_deleted += 1; } @@ -2842,9 +2858,13 @@ lmerge_matched: return NULL; } - (void) ExecGetJunkAttribute(epqslot, - resultRelInfo->ri_RowIdAttNo, - &isNull); + /* + * Update tupleid to that of the new tuple, for + * the refetch we do at the top. + */ + tupleid = ExecGetJunkAttribute(epqslot, + resultRelInfo->ri_RowIdAttNo, + &isNull); if (isNull) { *matched = false; @@ -2871,11 +2891,7 @@ lmerge_matched: * apply all the MATCHED rules again, to ensure * that the first qualifying WHEN MATCHED action * is executed. - * - * Update tupleid to that of the new tuple, for - * the refetch we do at the top. */ - ItemPointerCopy(&context->tmfd.ctid, tupleid); goto lmerge_matched; case TM_Deleted: @@ -3413,10 +3429,10 @@ ExecModifyTable(PlanState *pstate) PlanState *subplanstate; TupleTableSlot *slot; TupleTableSlot *oldSlot; + Datum tupleid; ItemPointerData tuple_ctid; HeapTupleData oldtupdata; HeapTuple oldtuple; - ItemPointer tupleid; CHECK_FOR_INTERRUPTS(); @@ -3465,6 +3481,8 @@ ExecModifyTable(PlanState *pstate) */ for (;;) { + RowRefType refType; + /* * Reset the per-output-tuple exprcontext. This is needed because * triggers expect to use that context as workspace. It's a bit ugly @@ -3515,7 +3533,7 @@ ExecModifyTable(PlanState *pstate) EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot); slot = ExecMerge(&context, node->resultRelInfo, - NULL, NULL, node->canSetTag); + PointerGetDatum(NULL), NULL, node->canSetTag); /* * If we got a RETURNING result, return it to the caller. @@ -3559,7 +3577,8 @@ ExecModifyTable(PlanState *pstate) EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot); slot = context.planSlot; - tupleid = NULL; + refType = resultRelInfo->ri_RowRefType; + tupleid = PointerGetDatum(NULL); oldtuple = NULL; /* @@ -3602,7 +3621,7 @@ ExecModifyTable(PlanState *pstate) EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot); slot = ExecMerge(&context, node->resultRelInfo, - NULL, NULL, node->canSetTag); + PointerGetDatum(NULL), NULL, node->canSetTag); /* * If we got a RETURNING result, return it to the @@ -3617,9 +3636,25 @@ ExecModifyTable(PlanState *pstate) elog(ERROR, "ctid is NULL"); } - tupleid = (ItemPointer) DatumGetPointer(datum); - tuple_ctid = *tupleid; /* be sure we don't free ctid!! */ - tupleid = &tuple_ctid; + if (refType == ROW_REF_TID) + { + /* shouldn't ever get a null result... */ + if (isNull) + elog(ERROR, "ctid is NULL"); + + tuple_ctid = *((ItemPointer) DatumGetPointer(datum)); /* be sure we don't free + * ctid!! */ + tupleid = PointerGetDatum(&tuple_ctid); + } + else + { + Assert(refType == ROW_REF_ROWID); + /* shouldn't ever get a null result... */ + if (isNull) + elog(ERROR, "rowid is NULL"); + + tupleid = datumCopy(datum, false, -1); + } } /* @@ -3659,7 +3694,7 @@ ExecModifyTable(PlanState *pstate) EvalPlanQualSetSlot(&node->mt_epqstate, context.planSlot); slot = ExecMerge(&context, node->resultRelInfo, - NULL, NULL, node->canSetTag); + PointerGetDatum(NULL), NULL, node->canSetTag); /* * If we got a RETURNING result, return it to the @@ -3723,6 +3758,7 @@ ExecModifyTable(PlanState *pstate) /* Fetch the most recent version of old tuple. */ Relation relation = resultRelInfo->ri_RelationDesc; + Assert(DatumGetPointer(tupleid) != NULL); if (!table_tuple_fetch_row_version(relation, tupleid, SnapshotAny, oldSlot)) @@ -3757,6 +3793,9 @@ ExecModifyTable(PlanState *pstate) break; } + if (refType == ROW_REF_ROWID && DatumGetPointer(tupleid) != NULL) + pfree(DatumGetPointer(tupleid)); + /* * If we got a RETURNING result, return it to caller. We'll continue * the work on next call. @@ -4000,10 +4039,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) relkind == RELKIND_MATVIEW || relkind == RELKIND_PARTITIONED_TABLE) { - resultRelInfo->ri_RowIdAttNo = - ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid"); - if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)) - elog(ERROR, "could not find junk ctid column"); + if (resultRelInfo->ri_RowRefType == ROW_REF_TID) + { + resultRelInfo->ri_RowIdAttNo = + ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid"); + if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)) + elog(ERROR, "could not find junk ctid column"); + } + else + { + resultRelInfo->ri_RowIdAttNo = + ExecFindJunkAttributeInTlist(subplan->targetlist, "rowid"); + if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo)) + elog(ERROR, "could not find junk rowid column"); + } } else if (relkind == RELKIND_FOREIGN_TABLE) { @@ -4313,6 +4362,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) estate->es_auxmodifytables = lcons(mtstate, estate->es_auxmodifytables); + + return mtstate; } diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index 864a9013b62..f4a124ac4eb 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -377,7 +377,7 @@ TidNext(TidScanState *node) if (node->tss_isCurrentOf) table_tuple_get_latest_tid(scan, &tid); - if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot)) + if (table_tuple_fetch_row_version(heapRelation, PointerGetDatum(&tid), snapshot, slot)) return slot; /* Bad TID or failed snapshot qual; try next */ diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 4b9c9deee84..ee648bedd4a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2376,19 +2376,24 @@ select_rowmark_type(RangeTblEntry *rte, LockClauseStrength strength, { /* Let the FDW select the rowmark type, if it wants to */ FdwRoutine *fdwroutine = GetFdwRoutineByRelId(rte->relid); + RowMarkType result = ROW_MARK_REFERENCE; /* Set row reference type as ROW_REF_COPY by default */ *refType = ROW_REF_COPY; if (fdwroutine->GetForeignRowMarkType != NULL) - return fdwroutine->GetForeignRowMarkType(rte, strength, refType); + result = fdwroutine->GetForeignRowMarkType(rte, strength, refType); + + /* XXX: should we fill this before? */ + rte->reftype = *refType; + /* Otherwise, use ROW_MARK_REFERENCE by default */ - return ROW_MARK_REFERENCE; + return result; } else { /* Regular table, apply the appropriate lock type */ - *refType = ROW_REF_TID; + *refType = rte->reftype; switch (strength) { case LCS_NONE: diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c index 4599b0dc761..3620be5b52c 100644 --- a/src/backend/optimizer/prep/preptlist.c +++ b/src/backend/optimizer/prep/preptlist.c @@ -226,6 +226,22 @@ preprocess_targetlist(PlannerInfo *root) true); tlist = lappend(tlist, tle); } + if (rc->allRefTypes & (1 << ROW_REF_ROWID)) + { + /* Need to fetch TID */ + var = makeVar(rc->rti, + RowIdAttributeNumber, + BYTEAOID, + -1, + InvalidOid, + 0); + snprintf(resname, sizeof(resname), "rowid%u", rc->rowmarkId); + tle = makeTargetEntry((Expr *) var, + list_length(tlist) + 1, + pstrdup(resname), + true); + tlist = lappend(tlist, tle); + } if (rc->allRefTypes & (1 << ROW_REF_COPY)) { /* Need the whole row as a junk var */ diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c index 6ba4eba224a..83c08bbd0e1 100644 --- a/src/backend/optimizer/util/appendinfo.c +++ b/src/backend/optimizer/util/appendinfo.c @@ -16,6 +16,7 @@ #include "access/htup_details.h" #include "access/table.h" +#include "access/tableam.h" #include "foreign/fdwapi.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" @@ -895,17 +896,35 @@ add_row_identity_columns(PlannerInfo *root, Index rtindex, relkind == RELKIND_MATVIEW || relkind == RELKIND_PARTITIONED_TABLE) { + RowRefType refType = ROW_REF_TID; + + refType = table_get_row_ref_type(target_relation); + /* * Emit CTID so that executor can find the row to merge, update or * delete. */ - var = makeVar(rtindex, - SelfItemPointerAttributeNumber, - TIDOID, - -1, - InvalidOid, - 0); - add_row_identity_var(root, var, rtindex, "ctid"); + if (refType == ROW_REF_TID) + { + var = makeVar(rtindex, + SelfItemPointerAttributeNumber, + TIDOID, + -1, + InvalidOid, + 0); + add_row_identity_var(root, var, rtindex, "ctid"); + } + else + { + Assert(refType == ROW_REF_ROWID); + var = makeVar(rtindex, + RowIdAttributeNumber, + BYTEAOID, + -1, + InvalidOid, + 0); + add_row_identity_var(root, var, rtindex, "rowid"); + } } else if (relkind == RELKIND_FOREIGN_TABLE) { diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index b4b076d1cb1..4a5a167d833 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -16,6 +16,7 @@ #include "access/sysattr.h" #include "access/table.h" +#include "access/tableam.h" #include "catalog/partition.h" #include "catalog/pg_inherits.h" #include "catalog/pg_type.h" @@ -282,6 +283,24 @@ expand_inherited_rtentry(PlannerInfo *root, RelOptInfo *rel, newvars = lappend(newvars, var); } + if ((new_allRefTypes & (1 << ROW_REF_ROWID)) && + !(old_allRefTypes & (1 << ROW_REF_ROWID))) + { + var = makeVar(oldrc->rti, + RowIdAttributeNumber, + BYTEAOID, + -1, + InvalidOid, + 0); + snprintf(resname, sizeof(resname), "rowid%u", oldrc->rowmarkId); + tle = makeTargetEntry((Expr *) var, + list_length(root->processed_tlist) + 1, + pstrdup(resname), + true); + root->processed_tlist = lappend(root->processed_tlist, tle); + newvars = lappend(newvars, var); + } + /* Add tableoid junk Var, unless we had it already */ if (!old_isParent) { @@ -485,6 +504,7 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, Assert(parentrte->rtekind == RTE_RELATION); /* else this is dubious */ childrte->relid = childOID; childrte->relkind = childrel->rd_rel->relkind; + childrte->reftype = table_get_row_ref_type(childrel); /* A partitioned child will need to be expanded further. */ if (childrte->relkind == RELKIND_PARTITIONED_TABLE) { diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index 427b7325db8..2c80e010f2a 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -20,6 +20,7 @@ #include "access/relation.h" #include "access/sysattr.h" #include "access/table.h" +#include "access/tableam.h" #include "catalog/heap.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" @@ -1503,6 +1504,7 @@ addRangeTableEntry(ParseState *pstate, rte->inh = inh; rte->relkind = rel->rd_rel->relkind; rte->rellockmode = lockmode; + rte->reftype = table_get_row_ref_type(rel); /* * Build the list of effective column names using user-supplied aliases @@ -1588,6 +1590,7 @@ addRangeTableEntryForRelation(ParseState *pstate, rte->inh = inh; rte->relkind = rel->rd_rel->relkind; rte->rellockmode = lockmode; + rte->reftype = table_get_row_ref_type(rel); /* * Build the list of effective column names using user-supplied aliases @@ -1656,6 +1659,7 @@ addRangeTableEntryForSubquery(ParseState *pstate, rte->rtekind = RTE_SUBQUERY; rte->subquery = subquery; rte->alias = alias; + rte->reftype = ROW_REF_COPY; eref = alias ? copyObject(alias) : makeAlias("unnamed_subquery", NIL); numaliases = list_length(eref->colnames); @@ -1763,6 +1767,7 @@ addRangeTableEntryForFunction(ParseState *pstate, rte->functions = NIL; /* we'll fill this list below */ rte->funcordinality = rangefunc->ordinality; rte->alias = alias; + rte->reftype = ROW_REF_COPY; /* * Choose the RTE alias name. We default to using the first function's @@ -2081,6 +2086,7 @@ addRangeTableEntryForTableFunc(ParseState *pstate, rte->coltypmods = tf->coltypmods; rte->colcollations = tf->colcollations; rte->alias = alias; + rte->reftype = ROW_REF_COPY; eref = alias ? copyObject(alias) : makeAlias(refname, NIL); numaliases = list_length(eref->colnames); @@ -2156,6 +2162,7 @@ addRangeTableEntryForValues(ParseState *pstate, rte->coltypmods = coltypmods; rte->colcollations = colcollations; rte->alias = alias; + rte->reftype = ROW_REF_COPY; eref = alias ? copyObject(alias) : makeAlias(refname, NIL); @@ -2252,6 +2259,7 @@ addRangeTableEntryForJoin(ParseState *pstate, rte->joinrightcols = rightcols; rte->join_using_alias = join_using_alias; rte->alias = alias; + rte->reftype = ROW_REF_COPY; eref = alias ? copyObject(alias) : makeAlias("unnamed_join", NIL); numaliases = list_length(eref->colnames); @@ -2332,6 +2340,7 @@ addRangeTableEntryForCTE(ParseState *pstate, rte->rtekind = RTE_CTE; rte->ctename = cte->ctename; rte->ctelevelsup = levelsup; + rte->reftype = ROW_REF_COPY; /* Self-reference if and only if CTE's parse analysis isn't completed */ rte->self_reference = !IsA(cte->ctequery, Query); @@ -2494,6 +2503,7 @@ addRangeTableEntryForENR(ParseState *pstate, * if they access transition tables linked to a table that is altered. */ rte->relid = enrmd->reliddesc; + rte->reftype = ROW_REF_COPY; /* * Build the list of effective column names using user-supplied aliases @@ -3257,6 +3267,9 @@ get_rte_attribute_name(RangeTblEntry *rte, AttrNumber attnum) attnum > 0 && attnum <= list_length(rte->alias->colnames)) return strVal(list_nth(rte->alias->colnames, attnum - 1)); + if (attnum == RowIdAttributeNumber) + return "rowid"; + /* * If the RTE is a relation, go to the system catalogs not the * eref->colnames list. This is a little slower but it will give the diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 9fd05b15e73..7a0fdbe3f40 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -1854,6 +1854,7 @@ ApplyRetrieveRule(Query *parsetree, rte = rt_fetch(rt_index, parsetree->rtable); rte->rtekind = RTE_SUBQUERY; + rte->reftype = ROW_REF_COPY; rte->subquery = rule_action; rte->security_barrier = RelationIsSecurityView(relation); diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c index 947a868e569..d3a41533552 100644 --- a/src/backend/utils/sort/tuplestore.c +++ b/src/backend/utils/sort/tuplestore.c @@ -1100,6 +1100,36 @@ tuplestore_gettupleslot(Tuplestorestate *state, bool forward, } } +/* + * Same as tuplestore_gettupleslot(), but foces tuple storage to slot. Thus, + * it can work with slot types different than minimal tuple. + */ +bool +tuplestore_force_gettupleslot(Tuplestorestate *state, bool forward, + bool copy, TupleTableSlot *slot) +{ + MinimalTuple tuple; + bool should_free; + + tuple = (MinimalTuple) tuplestore_gettuple(state, forward, &should_free); + + if (tuple) + { + if (copy && !should_free) + { + tuple = heap_copy_minimal_tuple(tuple); + should_free = true; + } + ExecForceStoreMinimalTuple(tuple, slot, should_free); + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + /* * tuplestore_advance - exported function to adjust position without fetching * diff --git a/src/include/access/sysattr.h b/src/include/access/sysattr.h index e88dec71ee9..867b5eb489e 100644 --- a/src/include/access/sysattr.h +++ b/src/include/access/sysattr.h @@ -24,6 +24,7 @@ #define MaxTransactionIdAttributeNumber (-4) #define MaxCommandIdAttributeNumber (-5) #define TableOidAttributeNumber (-6) -#define FirstLowInvalidHeapAttributeNumber (-7) +#define RowIdAttributeNumber (-7) +#define FirstLowInvalidHeapAttributeNumber (-8) #endif /* SYSATTR_H */ diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index dedaf1f758e..5be4c53af5e 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -476,7 +476,7 @@ typedef struct TableAmRoutine * test, returns true, false otherwise. */ bool (*tuple_fetch_row_version) (Relation rel, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot); @@ -535,7 +535,7 @@ typedef struct TableAmRoutine /* see table_tuple_delete() for reference about parameters */ TM_Result (*tuple_delete) (Relation rel, - ItemPointer tid, + Datum tupleid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, @@ -546,7 +546,7 @@ typedef struct TableAmRoutine /* see table_tuple_update() for reference about parameters */ TM_Result (*tuple_update) (Relation rel, - ItemPointer otid, + Datum tupleid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, @@ -559,7 +559,7 @@ typedef struct TableAmRoutine /* see table_tuple_lock() for reference about parameters */ TM_Result (*tuple_lock) (Relation rel, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, @@ -702,6 +702,11 @@ typedef struct TableAmRoutine * ------------------------------------------------------------------------ */ + /* + * Get the type of row identifier in the table. + */ + RowRefType (*get_row_ref_type) (Relation rel); + /* * This callback frees relation private cache data stored in rd_amcache. * After the call all memory related to rd_amcache must be freed, @@ -1284,9 +1289,9 @@ extern bool table_index_fetch_tuple_check(Relation rel, /* - * Fetch tuple at `tid` into `slot`, after doing a visibility test according to - * `snapshot`. If a tuple was found and passed the visibility test, returns - * true, false otherwise. + * Fetch tuple identified by `tupleid` into `slot`, after doing a visibility + * test according to `snapshot`. If a tuple was found and passed the visibility + * test, returns true, false otherwise. * * See table_index_fetch_tuple's comment about what the difference between * these functions is. It is correct to use this function outside of index @@ -1294,7 +1299,7 @@ extern bool table_index_fetch_tuple_check(Relation rel, */ static inline bool table_tuple_fetch_row_version(Relation rel, - ItemPointer tid, + Datum tupleid, Snapshot snapshot, TupleTableSlot *slot) { @@ -1306,7 +1311,8 @@ table_tuple_fetch_row_version(Relation rel, if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan)) elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding"); - return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot); + return rel->rd_tableam->tuple_fetch_row_version(rel, tupleid, + snapshot, slot); } /* @@ -1492,7 +1498,7 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, * * Input parameters: * relation - table to be modified (caller must hold suitable lock) - * tid - TID of tuple to be deleted + * tupleid - identifier of tuple to be deleted * cid - delete command ID (used for visibility test, and stored into * cmax if successful) * crosscheck - if not InvalidSnapshot, also check tuple against this @@ -1521,12 +1527,12 @@ table_multi_insert(Relation rel, TupleTableSlot **slots, int nslots, * TM_FailureData for additional info. */ static inline TM_Result -table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, +table_tuple_delete(Relation rel, Datum tupleid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, bool changingPart, TupleTableSlot *oldSlot) { - return rel->rd_tableam->tuple_delete(rel, tid, cid, + return rel->rd_tableam->tuple_delete(rel, tupleid, cid, snapshot, crosscheck, options, tmfd, changingPart, oldSlot); @@ -1540,7 +1546,7 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, * * Input parameters: * relation - table to be modified (caller must hold suitable lock) - * otid - TID of old tuple to be replaced + * tupleid - identifier of old tuple to be replaced * slot - newly constructed tuple data to store * cid - update command ID (used for visibility test, and stored into * cmax/cmin if successful) @@ -1577,13 +1583,13 @@ table_tuple_delete(Relation rel, ItemPointer tid, CommandId cid, * for additional info. */ static inline TM_Result -table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, +table_tuple_update(Relation rel, Datum tupleid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, int options, TM_FailureData *tmfd, LockTupleMode *lockmode, TU_UpdateIndexes *update_indexes, TupleTableSlot *oldSlot) { - return rel->rd_tableam->tuple_update(rel, otid, slot, + return rel->rd_tableam->tuple_update(rel, tupleid, slot, cid, snapshot, crosscheck, options, tmfd, lockmode, update_indexes, @@ -1595,7 +1601,7 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, * * Input parameters: * relation: relation containing tuple (caller must hold suitable lock) - * tid: TID of tuple to lock + * tupleid: identifier of tuple to lock * snapshot: snapshot to use for visibility determinations * cid: current command ID (used for visibility test, and stored into * tuple's cmax if lock is successful) @@ -1624,12 +1630,12 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, * comments for struct TM_FailureData for additional info. */ static inline TM_Result -table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, +table_tuple_lock(Relation rel, Datum tupleid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, TM_FailureData *tmfd) { - return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot, + return rel->rd_tableam->tuple_lock(rel, tupleid, snapshot, slot, cid, mode, wait_policy, flags, tmfd); } @@ -1915,6 +1921,22 @@ table_define_index(Relation rel, Oid indoid, bool reindex, * ---------------------------------------------------------------------------- */ +/* + * Get the type of row identifier. Returns ROW_REF_TID when table AM routine + * is not accessible. This happens during catalog initialization. All catalog + * tables are known to use heap. + */ +static inline RowRefType +table_get_row_ref_type(Relation rel) +{ + if (rel->rd_tableam) + return rel->rd_tableam->get_row_ref_type(rel); + else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + return ROW_REF_COPY; + else + return ROW_REF_TID; +} + /* * Frees relation private cache data stored in rd_amcache. Uses * free_rd_amcache method if provided. Assumes rd_amcache to point to single diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h index cb968d03ecd..c16e6b6e5a0 100644 --- a/src/include/commands/trigger.h +++ b/src/include/commands/trigger.h @@ -209,7 +209,7 @@ extern void ExecASDeleteTriggers(EState *estate, extern bool ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot **epqslot, TM_Result *tmresult, @@ -231,7 +231,7 @@ extern void ExecASUpdateTriggers(EState *estate, extern bool ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, ResultRelInfo *relinfo, - ItemPointer tupleid, + Datum tupleid, HeapTuple fdw_trigtuple, TupleTableSlot *newslot, TM_Result *tmresult, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index b89baef95d3..04d8cef6c68 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1089,6 +1089,8 @@ typedef struct RangeTblEntry Index perminfoindex pg_node_attr(query_jumble_ignore); /* sampling info, or NULL */ struct TableSampleClause *tablesample; + /* row indentifier for relation */ + RowRefType reftype; /* * Fields valid for a subquery RTE (else NULL): diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index d7f9c389dac..d850411aa95 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -1323,27 +1323,6 @@ typedef enum RowMarkType ROW_MARK_REFERENCE, /* just fetch the TID, don't lock it */ } RowMarkType; -/* - * RowRefType - - * enums for types of row identifiers - * - * For plain tables we can just fetch the TID, much as for a target relation; - * this case is represented by ROW_REF_TID. Otherwise (for example for VALUES - * or FUNCTION scans) we have to copy the whole row value. ROW_REF_COPY is - * pretty inefficient, since most of the time we'll never need the data; but - * fortunately the overhead is usually not performance-critical in practice. - * By default we use ROW_REF_COPY for foreign tables, but if the FDW has - * a concept of rowid it can request to use ROW_REF_TID instead. - * (Again, this probably doesn't make sense if a physical remote fetch is - * needed, but for FDWs that map to local storage it might be credible.) - * In future we may allow more types of row identifiers. - */ -typedef enum RowRefType -{ - ROW_REF_TID, /* Item pointer (block, offset) */ - ROW_REF_COPY /* Full row copy */ -} RowRefType; - #define RowMarkRequiresRowShareLock(marktype) ((marktype) <= ROW_MARK_KEYSHARE) /* diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 376f67e6a5f..84cf7837de1 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -2211,4 +2211,26 @@ typedef struct OnConflictExpr List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */ } OnConflictExpr; +/* + * RowRefType - + * enums for types of row identifiers + * + * For plain tables we can just fetch the TID, much as for a target relation; + * this case is represented by ROW_REF_TID. Otherwise (for example for VALUES + * or FUNCTION scans) we have to copy the whole row value. ROW_REF_COPY is + * pretty inefficient, since most of the time we'll never need the data; but + * fortunately the overhead is usually not performance-critical in practice. + * By default we use ROW_REF_COPY for foreign tables, but if the FDW has + * a concept of rowid it can request to use ROW_REF_TID instead. + * (Again, this probably doesn't make sense if a physical remote fetch is + * needed, but for FDWs that map to local storage it might be credible.) + * In future we may allow more types of row identifiers. + */ +typedef enum RowRefType +{ + ROW_REF_TID, /* Item pointer (block, offset) */ + ROW_REF_ROWID, /* Bytea row id */ + ROW_REF_COPY /* Full row copy */ +} RowRefType; + #endif /* PRIMNODES_H */ diff --git a/src/include/utils/tuplestore.h b/src/include/utils/tuplestore.h index 419613c17ba..cf291a0d17a 100644 --- a/src/include/utils/tuplestore.h +++ b/src/include/utils/tuplestore.h @@ -70,6 +70,9 @@ extern bool tuplestore_in_memory(Tuplestorestate *state); extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward, bool copy, TupleTableSlot *slot); +extern bool tuplestore_force_gettupleslot(Tuplestorestate *state, bool forward, + bool copy, TupleTableSlot *slot); + extern bool tuplestore_advance(Tuplestorestate *state, bool forward); extern bool tuplestore_skiptuples(Tuplestorestate *state, -- 2.39.3 (Apple Git-145)