diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c new file mode 100644 index 4c31f19..4799dc2 *** a/src/backend/commands/trigger.c --- b/src/backend/commands/trigger.c *************** typedef SetConstraintStateData *SetConst *** 2851,2864 **** /* * Per-trigger-event data * ! * The actual per-event data, AfterTriggerEventData, includes DONE/IN_PROGRESS ! * status bits and one or two tuple CTIDs. Each event record also has an ! * associated AfterTriggerSharedData that is shared across all instances ! * of similar events within a "chunk". ! * ! * We arrange not to waste storage on ate_ctid2 for non-update events. ! * We could go further and not store either ctid for statement-level triggers, ! * but that seems unlikely to be worth the trouble. * * Note: ats_firing_id is initially zero and is set to something else when * AFTER_TRIGGER_IN_PROGRESS is set. It indicates which trigger firing --- 2851,2864 ---- /* * Per-trigger-event data * ! * The actual per-event data includes DONE/IN_PROGRESS status bits, an ! * encoded CTID and a reference to an associated AfterTriggerSharedData that ! * is shared across all instances of similar events within a "chunk". The ! * encoding uses between 1 and 9 bytes per event, and is optimised so that ! * the smallest amount of space is used for the types of event that are most ! * likely to occur often - events where the CTID is the same or close to the ! * last event's CTID. This happens when there are multiple triggers on a ! * table, or the table is traversed using a sequential or bitmap heap scan. * * Note: ats_firing_id is initially zero and is set to something else when * AFTER_TRIGGER_IN_PROGRESS is set. It indicates which trigger firing *************** typedef SetConstraintStateData *SetConst *** 2869,2881 **** * cycles. So we need only ensure that ats_firing_id is zero when attaching * a new event to an existing AfterTriggerSharedData record. */ - typedef uint32 TriggerFlags; ! #define AFTER_TRIGGER_OFFSET 0x0FFFFFFF /* must be low-order ! * bits */ ! #define AFTER_TRIGGER_2CTIDS 0x10000000 ! #define AFTER_TRIGGER_DONE 0x20000000 ! #define AFTER_TRIGGER_IN_PROGRESS 0x40000000 typedef struct AfterTriggerSharedData *AfterTriggerShared; --- 2869,2900 ---- * cycles. So we need only ensure that ats_firing_id is zero when attaching * a new event to an existing AfterTriggerSharedData record. */ ! /* ! * The first (and possibly the only) byte of an event record contains flags ! * that determine how the rest of the record is encoded. ! * ! * AFTER_TRIGGER_SHARED_DATA_FLAGS (3 bits) determines how the index to the ! * AfterTriggerSharedData is encoded: ! * 0-5 => Index of AfterTriggerSharedData (counting back from the end of ! * the chunk). If the CTID is the same as the previous event, ! * this index is treated as relative to the last event's index. ! * 6 => Index is encoded in 1 additional byte. ! * 7 => Index is encoded in 2 additional bytes. ! * ! * AFTER_TRIGGER_CTID_FLAGS (3 bits) determines how the CTID is encoded: ! * 0 => CTID is same as previous event. ! * 1-6 => Difference between this event's CTID and previous CTID encoded ! * using 1-6 bytes. ! * 7 => CTID is offset by 1 relative to previous event. ! * ! * The flag byte is then followed by 0-2 bytes for the index to the shared ! * data and 0-6 bytes for the CTID. ! */ ! #define AFTER_TRIGGER_DONE 0x80 ! #define AFTER_TRIGGER_IN_PROGRESS 0x40 ! #define AFTER_TRIGGER_SHARED_DATA_FLAGS 0x38 ! #define AFTER_TRIGGER_CTID_FLAGS 0x07 typedef struct AfterTriggerSharedData *AfterTriggerShared; *************** typedef struct AfterTriggerEventData *Af *** 2891,2914 **** typedef struct AfterTriggerEventData { ! TriggerFlags ate_flags; /* status bits and offset to shared data */ ! ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */ ! ItemPointerData ate_ctid2; /* new updated tuple */ } AfterTriggerEventData; ! /* This struct must exactly match the one above except for not having ctid2 */ ! typedef struct AfterTriggerEventDataOneCtid ! { ! TriggerFlags ate_flags; /* status bits and offset to shared data */ ! ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */ ! } AfterTriggerEventDataOneCtid; ! #define SizeofTriggerEvent(evt) \ ! (((evt)->ate_flags & AFTER_TRIGGER_2CTIDS) ? \ ! sizeof(AfterTriggerEventData) : sizeof(AfterTriggerEventDataOneCtid)) ! #define GetTriggerSharedData(evt) \ ! ((AfterTriggerShared) ((char *) (evt) + ((evt)->ate_flags & AFTER_TRIGGER_OFFSET))) /* * To avoid palloc overhead, we keep trigger events in arrays in successively- --- 2910,2932 ---- typedef struct AfterTriggerEventData { ! char ate_flags; /* flag byte */ ! char ate_data[8]; /* variable length array (0-8 bytes) */ } AfterTriggerEventData; ! /* Macros to help extract the AfterTriggerSharedData index from an event */ ! #define SharedDataFlags(evt) \ ! (((evt)->ate_flags & AFTER_TRIGGER_SHARED_DATA_FLAGS) >> 3) ! #define SizeofSharedDataIndex(evt) \ ! (SharedDataFlags(evt) < 6 ? 0 : (SharedDataFlags(evt) - 5)) ! /* Macros to help extract the CTID from an event */ ! #define CtidFlags(evt) ((evt)->ate_flags & AFTER_TRIGGER_CTID_FLAGS) ! #define SizeofCtid(evt) (CtidFlags(evt) == 7 ? 0 : CtidFlags(evt)) ! /* Size of an encoded event (1-9 bytes) */ ! #define SizeofTriggerEvent(evt) \ ! (1 + SizeofSharedDataIndex(evt) + SizeofCtid(evt)) /* * To avoid palloc overhead, we keep trigger events in arrays in successively- *************** typedef struct AfterTriggerEventList *** 2934,2939 **** --- 2952,2959 ---- AfterTriggerEventChunk *head; AfterTriggerEventChunk *tail; char *tailfree; /* freeptr of tail chunk */ + uint16 last_ats_idx; /* Shared data index of last event */ + ItemPointerData last_ctid; /* CTID of last event */ } AfterTriggerEventList; /* Macros to help in iterating over a list of events */ *************** typedef struct AfterTriggerEventList *** 2947,2952 **** --- 2967,2975 ---- #define for_each_event_chunk(eptr, cptr, evtlist) \ for_each_chunk(cptr, evtlist) for_each_event(eptr, cptr) + #define AfterTriggerSharedDataIdx(chunk, evtsharedptr) \ + ((uint16) (((AfterTriggerShared) (chunk)->endptr) - evtsharedptr) - 1) + /* * All per-transaction data for the AFTER TRIGGERS module. *************** typedef AfterTriggersData *AfterTriggers *** 3024,3034 **** static AfterTriggers afterTriggers; ! static void AfterTriggerExecute(AfterTriggerEvent event, ! Relation rel, TriggerDesc *trigdesc, ! FmgrInfo *finfo, ! Instrumentation *instr, ! MemoryContext per_tuple_context); static SetConstraintState SetConstraintStateCreate(int numalloc); static SetConstraintState SetConstraintStateCopy(SetConstraintState state); static SetConstraintState SetConstraintStateAddItem(SetConstraintState state, --- 3047,3056 ---- static AfterTriggers afterTriggers; ! static void AfterTriggerExecute(Relation rel, TriggerDesc *trigdesc, ! FmgrInfo *finfo, Instrumentation *instr, ! MemoryContext per_tuple_context, ! TriggerEvent event, Oid tgoid, ItemPointer ctid); static SetConstraintState SetConstraintStateCreate(int numalloc); static SetConstraintState SetConstraintStateCopy(SetConstraintState state); static SetConstraintState SetConstraintStateAddItem(SetConstraintState state, *************** static SetConstraintState SetConstraintS *** 3036,3041 **** --- 3058,3304 ---- /* ---------- + * encodeAfterTriggerEvent() + * + * Encode a trigger event for storage in the AfterTriggerEventList. This + * aims to store the most common events using a single byte, but may take + * up to 9 bytes in some cases. + * + * Returns the number of bytes used to encode the event. + * ---------- + */ + static inline int + encodeAfterTriggerEvent(AfterTriggerEvent evt, + bool row_trigger, + uint16 ats_idx, + ItemPointer ctid, + uint16 prev_ats_idx, + ItemPointer prev_ctid) + { + bool store_ctid = row_trigger && !ItemPointerEquals(ctid, prev_ctid); + int ate_data_idx = 0; + char ats_flags; + char ctid_flags; + + /* + * Work out how to store the shared data index compactly. Try to code for + * the most common cases first, and have them use the minimum space. + * + * If the CTID isn't changing, then a common case is that the shared data + * index is increasing by a small amount (next trigger on the same row). + * If this 'small amount' is less than 6 it will fit in the flag byte. + * In most cases it will be a different trigger, so we can offset by 1, + * under the assumption that the index will not be the same. + */ + if (!store_ctid && ats_idx > prev_ats_idx && ats_idx < prev_ats_idx + 7) + ats_flags = (char) (ats_idx - prev_ats_idx - 1); /* 0-5 */ + + /* + * Else if the CTID is changing, the shared data index will often reset + * back to a small value. If this is less than 6 it will fit in the flag + * byte. + */ + else if (store_ctid && ats_idx < 6) + ats_flags = (char) ats_idx; /* 0-5 */ + + /* + * Else indexes less than 256 require one additional byte. The chunk + * size adapts so that typically not too many shared records are used + * per chunk (typically less than 200) but we can't rely on that. + */ + else if (ats_idx < 256) + { + ats_flags = 6; + evt->ate_data[ate_data_idx++] = (char) ats_idx; + } + + /* + * Otherwise indexes larger than 255 require 2 additional bytes. + */ + else + { + ats_flags = 7; + evt->ate_data[ate_data_idx++] = (char) (ats_idx >> 8); + evt->ate_data[ate_data_idx++] = (char) (ats_idx & 0xFF); + } + + if (store_ctid) + { + /* + * Store the CTID compactly. In this block we know that the CTID has + * changed, but hopefully only by a small amount. + * + * First test for the common case where it has increased by one. We + * simplify the encoding and decoding by discounting the case of it + * crossing into the next block (can that ever really happen?). + */ + BlockNumber blk1 = ItemPointerGetBlockNumber(prev_ctid); + BlockNumber blk2 = ItemPointerGetBlockNumber(ctid); + OffsetNumber off1 = ItemPointerGetOffsetNumber(prev_ctid); + OffsetNumber off2 = ItemPointerGetOffsetNumber(ctid); + + if (blk2 == blk1 && off2 == off1 + 1) + { + ctid_flags = 7; + } + else + { + /* + * XOR the 2 CTIDs together and store the result (MSB first) + * discarding any leading zero bytes. This reduces the space + * needed for nearby CTIDs, for which the high bytes will be + * equal and XOR to zero. + */ + BlockNumber blk_xor = blk1 ^ blk2; + OffsetNumber off_xor = off1 ^ off2; + + /* Number of bytes needed for the encoded CTID (1-6) */ + ctid_flags = blk_xor == 0 ? ((off_xor & 0xFF00) != 0 ? 2 : 1) : + (blk_xor & 0xFF000000) != 0 ? 6 : + (blk_xor & 0xFF0000) != 0 ? 5 : + (blk_xor & 0xFF00) != 0 ? 4 : 3; + + switch (ctid_flags) + { + case 6: + evt->ate_data[ate_data_idx++] = (char) (blk_xor >> 24); + /* fall through to next case */ + case 5: + evt->ate_data[ate_data_idx++] = (char) (blk_xor >> 16); + /* fall through to next case */ + case 4: + evt->ate_data[ate_data_idx++] = (char) (blk_xor >> 8); + /* fall through to next case */ + case 3: + evt->ate_data[ate_data_idx++] = (char) blk_xor; + /* fall through to next case */ + case 2: + evt->ate_data[ate_data_idx++] = (char) (off_xor >> 8); + /* fall through to next case */ + case 1: + evt->ate_data[ate_data_idx++] = (char) off_xor; + break; + default: + /* can't happen */ + break; + } + } + } + else + ctid_flags = 0; /* No CTID needed */ + + evt->ate_flags = (ats_flags << 3) | ctid_flags; + + return ate_data_idx + 1; /* Total size of encoded event */ + } + + /* ---------- + * GetTriggerSharedData() + * + * Returns a pointer to a trigger event's shared data. + * ---------- + */ + static inline AfterTriggerShared + GetTriggerSharedData(AfterTriggerEvent evt, AfterTriggerEventChunk *chunk, + uint16 prev_ats_idx) + { + int shared_data_flags = SharedDataFlags(evt); + uint16 ats_idx; + + if (shared_data_flags < 6) + { + if (CtidFlags(evt) == 0) + /* Next trigger on same row, uses a relative index */ + ats_idx = prev_ats_idx + (uint16) shared_data_flags + 1; + else + /* First trigger on a new row, with index < 6 */ + ats_idx = (uint16) shared_data_flags; + } + else if (shared_data_flags == 6) + { + /* 1-byte shared data index follows flag byte */ + ats_idx = (uint16) evt->ate_data[0]; + } + else /* shared_data_flags == 7 */ + { + /* 2-byte shared data index follow flag byte */ + ats_idx = (((uint16) evt->ate_data[0]) << 8) | evt->ate_data[1]; + } + return ((AfterTriggerShared) chunk->endptr) - ats_idx - 1; + } + + /* ---------- + * GetTriggerCtid() + * + * Get the CTID encoded in an AfterTriggerEvent. + * ---------- + */ + static inline void + GetTriggerCtid(AfterTriggerEvent evt, ItemPointer prev_ctid, ItemPointer ctid) + { + if (CtidFlags(evt) == 0) + { + /* CTID is same as that of previous event */ + ItemPointerCopy(prev_ctid, ctid); + } + else if (CtidFlags(evt) == 7) + { + /* + * CTID is the previous value plus one. Note the encoding used + * explicitly prevents this carrying over into the next block. + */ + ItemPointerSet(ctid, ItemPointerGetBlockNumber(prev_ctid), + ItemPointerGetOffsetNumber(prev_ctid) + 1); + } + else + { + /* + * Values in the range 1-6 indicate an encoded difference from the + * previous CTID. This difference is computed by XORing the two CTIDs + * and storing the result (MSB first) after discarding any leading + * zero bytes. To reproduce the original CTID, we just XOR the + * difference bytes again. + */ + unsigned char *ctid_diffs = ((unsigned char *)evt) + 1 + + SizeofSharedDataIndex(evt); + BlockNumber blkno = ItemPointerGetBlockNumber(prev_ctid); + OffsetNumber offno = ItemPointerGetOffsetNumber(prev_ctid); + + switch (CtidFlags(evt)) + { + case 6: + blkno ^= ((BlockNumber) *ctid_diffs) << 24; + ctid_diffs++; + /* fall through to next case */ + case 5: + blkno ^= ((BlockNumber) *ctid_diffs) << 16; + ctid_diffs++; + /* fall through to next case */ + case 4: + blkno ^= ((BlockNumber) *ctid_diffs) << 8; + ctid_diffs++; + /* fall through to next case */ + case 3: + blkno ^= (BlockNumber) *ctid_diffs; + ctid_diffs++; + /* fall through to next case */ + case 2: + offno ^= ((OffsetNumber) *ctid_diffs) << 8; + ctid_diffs++; + /* fall through to next case */ + case 1: + offno ^= (OffsetNumber) *ctid_diffs; + break; + default: + /* can't happen */ + break; + } + ItemPointerSet(ctid, blkno, offno); + } + } + + + /* ---------- * afterTriggerCheckState() * * Returns true if the trigger event is actually in state DEFERRED. *************** afterTriggerCheckState(AfterTriggerShare *** 3085,3102 **** * ---------- */ static void ! afterTriggerAddEvent(AfterTriggerEventList *events, ! AfterTriggerEvent event, AfterTriggerShared evtshared) { ! Size eventsize = SizeofTriggerEvent(event); Size needed = eventsize + sizeof(AfterTriggerSharedData); AfterTriggerEventChunk *chunk; AfterTriggerShared newshared; ! AfterTriggerEvent newevent; /* * If empty list or not enough room in the tail chunk, make a new chunk. ! * We assume here that a new shared record will always be needed. */ chunk = events->tail; if (chunk == NULL || --- 3348,3368 ---- * ---------- */ static void ! afterTriggerAddEvent(AfterTriggerEventList *events, ItemPointer ctid, ! AfterTriggerShared evtshared) { ! bool row_trigger = (evtshared->ats_event & TRIGGER_EVENT_ROW) != 0; ! Size eventsize = sizeof(AfterTriggerEventData); /* worst case */ Size needed = eventsize + sizeof(AfterTriggerSharedData); AfterTriggerEventChunk *chunk; AfterTriggerShared newshared; ! AfterTriggerEventData newevent; ! uint16 ats_idx; /* * If empty list or not enough room in the tail chunk, make a new chunk. ! * We assume here a worst case event record size and that a new shared ! * record will always be needed. */ chunk = events->tail; if (chunk == NULL || *************** afterTriggerAddEvent(AfterTriggerEventLi *** 3115,3138 **** /* * Chunk size starts at 1KB and is allowed to increase up to 1MB. ! * These numbers are fairly arbitrary, though there is a hard limit at ! * AFTER_TRIGGER_OFFSET; else we couldn't link event records to their ! * shared records using the available space in ate_flags. Another ! * constraint is that if the chunk size gets too huge, the search loop ! * below would get slow given a (not too common) usage pattern with ! * many distinct event types in a chunk. Therefore, we double the ! * preceding chunk size only if there weren't too many shared records ! * in the preceding chunk; otherwise we halve it. This gives us some ! * ability to adapt to the actual usage pattern of the current query ! * while still having large chunk sizes in typical usage. All chunk ! * sizes used should be MAXALIGN multiples, to ensure that the shared ! * records will be aligned safely. */ #define MIN_CHUNK_SIZE 1024 #define MAX_CHUNK_SIZE (1024*1024) ! #if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1) ! #error MAX_CHUNK_SIZE must not exceed AFTER_TRIGGER_OFFSET #endif if (chunk == NULL) --- 3381,3407 ---- /* * Chunk size starts at 1KB and is allowed to increase up to 1MB. ! * These numbers are fairly arbitrary, though there is a hard limit ! * set by the fact that we use 16-bit indexes into the shared data ! * records at the end of the chunk. Another constraint is that if the ! * chunk size gets too huge, the search loop below would get slow ! * given a (not too common) usage pattern with many distinct event ! * types in a chunk. This would also affect the per-event compression ! * used. Therefore, we double the preceding chunk size only if there ! * weren't too many shared records in the preceding chunk; otherwise ! * we halve it. This gives us some ability to adapt to the actual ! * usage pattern of the current query while still having large chunk ! * sizes in typical usage. All chunk sizes used should be MAXALIGN ! * multiples, to ensure that the shared records will be aligned ! * safely. */ #define MIN_CHUNK_SIZE 1024 #define MAX_CHUNK_SIZE (1024*1024) + #define SHARED_DATA_SIZE 16 + #define MAX_SHARED_DATA_COUNT (256*256) ! #if MAX_CHUNK_SIZE > (SHARED_DATA_SIZE * MAX_SHARED_DATA_COUNT) ! #error MAX_CHUNK_SIZE must not exceed SHARED_DATA_SIZE * MAX_SHARED_DATA_COUNT #endif if (chunk == NULL) *************** afterTriggerAddEvent(AfterTriggerEventLi *** 3183,3195 **** newshared->ats_firing_id = 0; /* just to be sure */ chunk->endfree = (char *) newshared; } /* Insert the data */ ! newevent = (AfterTriggerEvent) chunk->freeptr; ! memcpy(newevent, event, eventsize); ! /* ... and link the new event to its shared record */ ! newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET; ! newevent->ate_flags |= (char *) newshared - (char *) newevent; chunk->freeptr += eventsize; events->tailfree = chunk->freeptr; --- 3452,3467 ---- newshared->ats_firing_id = 0; /* just to be sure */ chunk->endfree = (char *) newshared; } + ats_idx = AfterTriggerSharedDataIdx(chunk, newshared); /* Insert the data */ ! eventsize = encodeAfterTriggerEvent(&newevent, row_trigger, ats_idx, ! ctid, events->last_ats_idx, ! &events->last_ctid); ! memcpy(chunk->freeptr, &newevent, eventsize); ! events->last_ats_idx = ats_idx; ! if (row_trigger) ! ItemPointerCopy(ctid, &events->last_ctid); chunk->freeptr += eventsize; events->tailfree = chunk->freeptr; *************** afterTriggerFreeEventList(AfterTriggerEv *** 3215,3220 **** --- 3487,3494 ---- events->head = NULL; events->tail = NULL; events->tailfree = NULL; + events->last_ats_idx = 0; + ItemPointerSet(&events->last_ctid, 0, 1); /* first valid CTID */ } /* ---------- *************** afterTriggerRestoreEventList(AfterTrigge *** 3268,3290 **** * fmgr lookup cache space at the caller level. (For triggers fired at * the end of a query, we can even piggyback on the executor's state.) * - * event: event currently being fired. * rel: open relation for event. * trigdesc: working copy of rel's trigger info. * finfo: array of fmgr lookup cache entries (one per trigger in trigdesc). * instr: array of EXPLAIN ANALYZE instrumentation nodes (one per trigger), * or NULL if no instrumentation is wanted. * per_tuple_context: memory context to call trigger function in. * ---------- */ static void ! AfterTriggerExecute(AfterTriggerEvent event, ! Relation rel, TriggerDesc *trigdesc, FmgrInfo *finfo, Instrumentation *instr, ! MemoryContext per_tuple_context) { ! AfterTriggerShared evtshared = GetTriggerSharedData(event); ! Oid tgoid = evtshared->ats_tgoid; TriggerData LocTriggerData; HeapTupleData tuple1; HeapTupleData tuple2; --- 3542,3565 ---- * fmgr lookup cache space at the caller level. (For triggers fired at * the end of a query, we can even piggyback on the executor's state.) * * rel: open relation for event. * trigdesc: working copy of rel's trigger info. * finfo: array of fmgr lookup cache entries (one per trigger in trigdesc). * instr: array of EXPLAIN ANALYZE instrumentation nodes (one per trigger), * or NULL if no instrumentation is wanted. * per_tuple_context: memory context to call trigger function in. + * event: type of event being fired. + * tgoid: trigger OID of event being fired. + * ctid: CTID for event (ignored for statement-level events). * ---------- */ static void ! AfterTriggerExecute(Relation rel, TriggerDesc *trigdesc, FmgrInfo *finfo, Instrumentation *instr, ! MemoryContext per_tuple_context, ! TriggerEvent event, Oid tgoid, ItemPointer ctid) { ! ItemPointer new_ctid = NULL; TriggerData LocTriggerData; HeapTupleData tuple1; HeapTupleData tuple2; *************** AfterTriggerExecute(AfterTriggerEvent ev *** 3318,3330 **** /* * Fetch the required tuple(s). */ ! if (ItemPointerIsValid(&(event->ate_ctid1))) { ! ItemPointerCopy(&(event->ate_ctid1), &(tuple1.t_self)); if (!heap_fetch(rel, SnapshotAny, &tuple1, &buffer1, false, NULL)) elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); LocTriggerData.tg_trigtuple = &tuple1; LocTriggerData.tg_trigtuplebuf = buffer1; } else { --- 3593,3609 ---- /* * Fetch the required tuple(s). */ ! if ((event & TRIGGER_EVENT_ROW) != 0) { ! ItemPointerCopy(ctid, &(tuple1.t_self)); if (!heap_fetch(rel, SnapshotAny, &tuple1, &buffer1, false, NULL)) elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); LocTriggerData.tg_trigtuple = &tuple1; LocTriggerData.tg_trigtuplebuf = buffer1; + + /* for an UPDATE the old tuple points to the new tuple */ + if (TRIGGER_FIRED_BY_UPDATE(event)) + new_ctid = &(tuple1.t_data->t_ctid); } else { *************** AfterTriggerExecute(AfterTriggerEvent ev *** 3332,3342 **** LocTriggerData.tg_trigtuplebuf = InvalidBuffer; } ! /* don't touch ctid2 if not there */ ! if ((event->ate_flags & AFTER_TRIGGER_2CTIDS) && ! ItemPointerIsValid(&(event->ate_ctid2))) { ! ItemPointerCopy(&(event->ate_ctid2), &(tuple2.t_self)); if (!heap_fetch(rel, SnapshotAny, &tuple2, &buffer2, false, NULL)) elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); LocTriggerData.tg_newtuple = &tuple2; --- 3611,3619 ---- LocTriggerData.tg_trigtuplebuf = InvalidBuffer; } ! if (ItemPointerIsValid(new_ctid)) { ! ItemPointerCopy(new_ctid, &(tuple2.t_self)); if (!heap_fetch(rel, SnapshotAny, &tuple2, &buffer2, false, NULL)) elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); LocTriggerData.tg_newtuple = &tuple2; *************** AfterTriggerExecute(AfterTriggerEvent ev *** 3353,3359 **** */ LocTriggerData.type = T_TriggerData; LocTriggerData.tg_event = ! evtshared->ats_event & (TRIGGER_EVENT_OPMASK | TRIGGER_EVENT_ROW); LocTriggerData.tg_relation = rel; MemoryContextReset(per_tuple_context); --- 3630,3636 ---- */ LocTriggerData.type = T_TriggerData; LocTriggerData.tg_event = ! event & (TRIGGER_EVENT_OPMASK | TRIGGER_EVENT_ROW); LocTriggerData.tg_relation = rel; MemoryContextReset(per_tuple_context); *************** afterTriggerMarkEvents(AfterTriggerEvent *** 3409,3420 **** bool found = false; AfterTriggerEvent event; AfterTriggerEventChunk *chunk; for_each_event_chunk(event, chunk, *events) { ! AfterTriggerShared evtshared = GetTriggerSharedData(event); bool defer_it = false; if (!(event->ate_flags & (AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS))) { --- 3686,3705 ---- bool found = false; AfterTriggerEvent event; AfterTriggerEventChunk *chunk; + uint16 prev_ats_idx = 0; + ItemPointerData prev_ctid; + + ItemPointerSet(&prev_ctid, 0, 1); /* first valid CTID */ for_each_event_chunk(event, chunk, *events) { ! AfterTriggerShared evtshared = GetTriggerSharedData(event, chunk, ! prev_ats_idx); ! ItemPointerData ctid; bool defer_it = false; + GetTriggerCtid(event, &prev_ctid, &ctid); + if (!(event->ate_flags & (AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS))) { *************** afterTriggerMarkEvents(AfterTriggerEvent *** 3443,3452 **** if (defer_it && move_list != NULL) { /* add it to move_list */ ! afterTriggerAddEvent(move_list, event, evtshared); /* mark original copy "done" so we don't do it again */ event->ate_flags |= AFTER_TRIGGER_DONE; } } return found; --- 3728,3740 ---- if (defer_it && move_list != NULL) { /* add it to move_list */ ! afterTriggerAddEvent(move_list, &ctid, evtshared); /* mark original copy "done" so we don't do it again */ event->ate_flags |= AFTER_TRIGGER_DONE; } + + prev_ats_idx = AfterTriggerSharedDataIdx(chunk, evtshared); + ItemPointerCopy(&ctid, &prev_ctid); } return found; *************** afterTriggerInvokeEvents(AfterTriggerEve *** 3487,3492 **** --- 3775,3784 ---- TriggerDesc *trigdesc = NULL; FmgrInfo *finfo = NULL; Instrumentation *instr = NULL; + uint16 prev_ats_idx = 0; + ItemPointerData prev_ctid; + + ItemPointerSet(&prev_ctid, 0, 1); /* first valid CTID */ /* Make a local EState if need be */ if (estate == NULL) *************** afterTriggerInvokeEvents(AfterTriggerEve *** 3510,3516 **** for_each_event(event, chunk) { ! AfterTriggerShared evtshared = GetTriggerSharedData(event); /* * Is it one for me to fire? --- 3802,3812 ---- for_each_event(event, chunk) { ! AfterTriggerShared evtshared = GetTriggerSharedData(event, chunk, ! prev_ats_idx); ! ItemPointerData ctid; ! ! GetTriggerCtid(event, &prev_ctid, &ctid); /* * Is it one for me to fire? *************** afterTriggerInvokeEvents(AfterTriggerEve *** 3541,3548 **** * still set, so recursive examinations of the event list * won't try to re-fire it. */ ! AfterTriggerExecute(event, rel, trigdesc, finfo, instr, ! per_tuple_context); /* * Mark the event as done. --- 3837,3845 ---- * still set, so recursive examinations of the event list * won't try to re-fire it. */ ! AfterTriggerExecute(rel, trigdesc, finfo, instr, ! per_tuple_context, evtshared->ats_event, ! evtshared->ats_tgoid, &ctid); /* * Mark the event as done. *************** afterTriggerInvokeEvents(AfterTriggerEve *** 3555,3560 **** --- 3852,3860 ---- /* something remains to be done */ all_fired = all_fired_in_chunk = false; } + + prev_ats_idx = AfterTriggerSharedDataIdx(chunk, evtshared); + ItemPointerCopy(&ctid, &prev_ctid); } /* Clear the chunk if delete_ok and nothing left of interest */ *************** AfterTriggerBeginXact(void) *** 3620,3625 **** --- 3920,3927 ---- afterTriggers->events.head = NULL; afterTriggers->events.tail = NULL; afterTriggers->events.tailfree = NULL; + afterTriggers->events.last_ats_idx = 0; + ItemPointerSet(&afterTriggers->events.last_ctid, 0, 1); /* first valid CTID */ afterTriggers->query_depth = -1; /* We initialize the query stack to a reasonable size */ *************** AfterTriggerBeginQuery(void) *** 3679,3684 **** --- 3981,3988 ---- events->head = NULL; events->tail = NULL; events->tailfree = NULL; + events->last_ats_idx = 0; + ItemPointerSet(&events->last_ctid, 0, 1); /* first valid CTID */ } *************** AfterTriggerEndSubXact(bool isCommit) *** 3921,3926 **** --- 4225,4231 ---- AfterTriggerEvent event; AfterTriggerEventChunk *chunk; CommandId subxact_firing_id; + uint16 prev_ats_idx = 0; /* * Ignore call if the transaction is in aborted state. (Probably *************** AfterTriggerEndSubXact(bool isCommit) *** 3997,4003 **** subxact_firing_id = afterTriggers->firing_stack[my_level]; for_each_event_chunk(event, chunk, afterTriggers->events) { ! AfterTriggerShared evtshared = GetTriggerSharedData(event); if (event->ate_flags & (AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS)) --- 4302,4309 ---- subxact_firing_id = afterTriggers->firing_stack[my_level]; for_each_event_chunk(event, chunk, afterTriggers->events) { ! AfterTriggerShared evtshared = GetTriggerSharedData(event, chunk, ! prev_ats_idx); if (event->ate_flags & (AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS)) *************** AfterTriggerEndSubXact(bool isCommit) *** 4006,4011 **** --- 4312,4318 ---- event->ate_flags &= ~(AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS); } + prev_ats_idx = AfterTriggerSharedDataIdx(chunk, evtshared); } } } *************** AfterTriggerPendingOnRel(Oid relid) *** 4389,4394 **** --- 4696,4702 ---- AfterTriggerEvent event; AfterTriggerEventChunk *chunk; int depth; + uint16 prev_ats_idx = 0; /* No-op if we aren't in a transaction. (Shouldn't happen?) */ if (afterTriggers == NULL) *************** AfterTriggerPendingOnRel(Oid relid) *** 4397,4403 **** /* Scan queued events */ for_each_event_chunk(event, chunk, afterTriggers->events) { ! AfterTriggerShared evtshared = GetTriggerSharedData(event); /* * We can ignore completed events. (Even if a DONE flag is rolled --- 4705,4712 ---- /* Scan queued events */ for_each_event_chunk(event, chunk, afterTriggers->events) { ! AfterTriggerShared evtshared = GetTriggerSharedData(event, chunk, ! prev_ats_idx); /* * We can ignore completed events. (Even if a DONE flag is rolled *************** AfterTriggerPendingOnRel(Oid relid) *** 4409,4414 **** --- 4718,4725 ---- if (evtshared->ats_relid == relid) return true; + + prev_ats_idx = AfterTriggerSharedDataIdx(chunk, evtshared); } /* *************** AfterTriggerPendingOnRel(Oid relid) *** 4418,4432 **** */ for (depth = 0; depth <= afterTriggers->query_depth; depth++) { for_each_event_chunk(event, chunk, afterTriggers->query_stack[depth]) { ! AfterTriggerShared evtshared = GetTriggerSharedData(event); if (event->ate_flags & AFTER_TRIGGER_DONE) continue; if (evtshared->ats_relid == relid) return true; } } --- 4729,4747 ---- */ for (depth = 0; depth <= afterTriggers->query_depth; depth++) { + prev_ats_idx = 0; for_each_event_chunk(event, chunk, afterTriggers->query_stack[depth]) { ! AfterTriggerShared evtshared = GetTriggerSharedData(event, chunk, ! prev_ats_idx); if (event->ate_flags & AFTER_TRIGGER_DONE) continue; if (evtshared->ats_relid == relid) return true; + + prev_ats_idx = AfterTriggerSharedDataIdx(chunk, evtshared); } } *************** AfterTriggerSaveEvent(EState *estate, Re *** 4453,4460 **** { Relation rel = relinfo->ri_RelationDesc; TriggerDesc *trigdesc = relinfo->ri_TrigDesc; - AfterTriggerEventData new_event; AfterTriggerSharedData new_shared; int tgtype_event; int tgtype_level; int i; --- 4768,4775 ---- { Relation rel = relinfo->ri_RelationDesc; TriggerDesc *trigdesc = relinfo->ri_TrigDesc; AfterTriggerSharedData new_shared; + ItemPointer ctid; int tgtype_event; int tgtype_level; int i; *************** AfterTriggerSaveEvent(EState *estate, Re *** 4476,4482 **** * validation is important to make sure we don't walk off the edge of our * arrays. */ - new_event.ate_flags = 0; switch (event) { case TRIGGER_EVENT_INSERT: --- 4791,4796 ---- *************** AfterTriggerSaveEvent(EState *estate, Re *** 4485,4499 **** { Assert(oldtup == NULL); Assert(newtup != NULL); ! ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid1)); ! ItemPointerSetInvalid(&(new_event.ate_ctid2)); } else { Assert(oldtup == NULL); Assert(newtup == NULL); ! ItemPointerSetInvalid(&(new_event.ate_ctid1)); ! ItemPointerSetInvalid(&(new_event.ate_ctid2)); } break; case TRIGGER_EVENT_DELETE: --- 4799,4811 ---- { Assert(oldtup == NULL); Assert(newtup != NULL); ! ctid = &newtup->t_self; } else { Assert(oldtup == NULL); Assert(newtup == NULL); ! ctid = NULL; } break; case TRIGGER_EVENT_DELETE: *************** AfterTriggerSaveEvent(EState *estate, Re *** 4502,4516 **** { Assert(oldtup != NULL); Assert(newtup == NULL); ! ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1)); ! ItemPointerSetInvalid(&(new_event.ate_ctid2)); } else { Assert(oldtup == NULL); Assert(newtup == NULL); ! ItemPointerSetInvalid(&(new_event.ate_ctid1)); ! ItemPointerSetInvalid(&(new_event.ate_ctid2)); } break; case TRIGGER_EVENT_UPDATE: --- 4814,4826 ---- { Assert(oldtup != NULL); Assert(newtup == NULL); ! ctid = &oldtup->t_self; } else { Assert(oldtup == NULL); Assert(newtup == NULL); ! ctid = NULL; } break; case TRIGGER_EVENT_UPDATE: *************** AfterTriggerSaveEvent(EState *estate, Re *** 4519,4546 **** { Assert(oldtup != NULL); Assert(newtup != NULL); ! ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1)); ! ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid2)); ! new_event.ate_flags |= AFTER_TRIGGER_2CTIDS; } else { Assert(oldtup == NULL); Assert(newtup == NULL); ! ItemPointerSetInvalid(&(new_event.ate_ctid1)); ! ItemPointerSetInvalid(&(new_event.ate_ctid2)); } break; case TRIGGER_EVENT_TRUNCATE: tgtype_event = TRIGGER_TYPE_TRUNCATE; Assert(oldtup == NULL); Assert(newtup == NULL); ! ItemPointerSetInvalid(&(new_event.ate_ctid1)); ! ItemPointerSetInvalid(&(new_event.ate_ctid2)); break; default: elog(ERROR, "invalid after-trigger event code: %d", event); tgtype_event = 0; /* keep compiler quiet */ break; } --- 4829,4853 ---- { Assert(oldtup != NULL); Assert(newtup != NULL); ! ctid = &oldtup->t_self; } else { Assert(oldtup == NULL); Assert(newtup == NULL); ! ctid = NULL; } break; case TRIGGER_EVENT_TRUNCATE: tgtype_event = TRIGGER_TYPE_TRUNCATE; Assert(oldtup == NULL); Assert(newtup == NULL); ! ctid = NULL; break; default: elog(ERROR, "invalid after-trigger event code: %d", event); tgtype_event = 0; /* keep compiler quiet */ + ctid = NULL; break; } *************** AfterTriggerSaveEvent(EState *estate, Re *** 4627,4632 **** new_shared.ats_firing_id = 0; afterTriggerAddEvent(&afterTriggers->query_stack[afterTriggers->query_depth], ! &new_event, &new_shared); } } --- 4934,4939 ---- new_shared.ats_firing_id = 0; afterTriggerAddEvent(&afterTriggers->query_stack[afterTriggers->query_depth], ! ctid, &new_shared); } }