diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 6f972e630a..066e13a1e3 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -72,6 +72,8 @@ typedef struct HeapCheckContext TransactionId oldest_xid; /* ShmemVariableCache->oldestXid */ FullTransactionId oldest_fxid; /* 64-bit version of oldest_xid, computed * relative to next_fxid */ + TransactionId safe_xmin; /* this XID and newer ones can't become + * all-visible while we're running */ /* * Cached copy of value from MultiXactState @@ -133,8 +135,10 @@ static void check_tuple(HeapCheckContext *ctx); static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx); static bool check_tuple_attribute(HeapCheckContext *ctx); -static bool check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, - HeapCheckContext *ctx); +static void check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, + HeapCheckContext *ctx, + bool *tuple_is_readable, + bool *tuple_cannot_die_now); static void report_corruption(HeapCheckContext *ctx, char *msg); static TupleDesc verify_heapam_tupdesc(void); @@ -248,6 +252,12 @@ verify_heapam(PG_FUNCTION_ARGS) memset(&ctx, 0, sizeof(HeapCheckContext)); ctx.cached_xid = InvalidTransactionId; + /* + * Any xmin newer than the xmin of our snapshot can't become all-visible + * while we're running. + */ + ctx.safe_xmin = GetTransactionSnapshot()->xmin; + /* * If we report corruption when not examining some individual attribute, * we need attnum to be reported as NULL. Set that up before any @@ -580,27 +590,33 @@ verify_heapam_tupdesc(void) * other words, we do not return false merely because we detected them.) * * For visibility determination not specifically related to corruption, what we - * want to know is if a tuple is potentially visible to any running - * transaction. If you are tempted to replace this function's visibility logic - * with a call to another visibility checking function, keep in mind that this - * function does not update hint bits, as it seems imprudent to write hint bits - * (or anything at all) to a table during a corruption check. Nor does this - * function bother classifying tuple visibility beyond a boolean visible vs. - * not visible. - * - * The caller should already have checked that xmin and xmax are not out of - * bounds for the relation. + * want to know is (1) whether the original inserter committed and (2) + * whether it's possible for the tuple to be pruned while we're still checking + * the relation. If (1) is not the case, then the tuple descriptor used to + * construct the table might include additional columns that are we don't + * know about, so we don't try to decode the tuple. If (2) is not the case, + * it's OK to check the tuple, but it's not safe to follow any TOAST pointers, + * because if this tuple can be pruned away at any time, the same is true + * of its TOAST chunks. * - * Returns whether the tuple is both visible and sufficiently sensible to - * undergo attribute checks. + * Unlike other visibility-checking functions, this does not update hint bits, + * as it seems imprudent to write hint bits (or anything at all) to a table + * during a corruption check. Nor does this function bother classifying tuple + * visibility beyond answering the questions mentioned above. */ -static bool -check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, HeapCheckContext *ctx) +static void +check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, HeapCheckContext *ctx, + bool *tuple_is_readable, + bool *tuple_cannot_die_now) { uint16 infomask = tuphdr->t_infomask; bool header_garbled = false; unsigned expected_hoff; + /* We haven't proven anything yet. */ + *tuple_is_readable = false; + *tuple_cannot_die_now = false; + if (ctx->tuphdr->t_hoff > ctx->lp_len) { report_corruption(ctx, @@ -649,169 +665,153 @@ check_tuple_header_and_visibilty(HeapTupleHeader tuphdr, HeapCheckContext *ctx) } if (header_garbled) - return false; /* checking of this tuple should not continue */ + return; /* checking of this tuple should not continue */ + + /* + * XXX check whether raw xmin is ok by caling get_xid_status, unless + * HeapTupleHeaderXminFrozen in which case we should skip it + */ /* * Ok, we can examine the header for tuple visibility purposes, though we * still need to be careful about a few remaining types of header * corruption. This logic roughly follows that of - * HeapTupleSatisfiesVacuum. Where possible the comments indicate which - * HTSV_Result we think that function might return for this tuple. + * HeapTupleSatisfiesVacuum and similar functions, but we don't need to + * distinguish quite as many cases. */ if (!HeapTupleHeaderXminCommitted(tuphdr)) { - TransactionId raw_xmin = HeapTupleHeaderGetRawXmin(tuphdr); - if (HeapTupleHeaderXminInvalid(tuphdr)) - return false; /* HEAPTUPLE_DEAD */ - /* Used by pre-9.0 binary upgrades */ - else if (infomask & HEAP_MOVED_OFF || - infomask & HEAP_MOVED_IN) + return; + else if (tuphdr->t_infomask & HEAP_MOVED_OFF) { - XidCommitStatus status; TransactionId xvac = HeapTupleHeaderGetXvac(tuphdr); - switch (get_xid_status(xvac, ctx, &status)) + /* XXX sanity check xvac with get_xid_status */ + + /* + * Used by pre-9.0 binary upgrades. It should be impossible for + * xvac to still be running, since we've removed all that code, + * but even if it were, it ought to be safe to read the tuple, + * since the original inserter must have committed. But, if the + * xvac transaction committed, this tuple (and its associated + * TOAST tuples) could be pruned at any time. + */ + if (TransactionIdDidCommit(xvac)) { - case XID_INVALID: - report_corruption(ctx, - pstrdup("old-style VACUUM FULL transaction ID is invalid")); - return false; /* corrupt */ - case XID_IN_FUTURE: - report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u equals or exceeds next valid transaction ID %u:%u", - xvac, - EpochFromFullTransactionId(ctx->next_fxid), - XidFromFullTransactionId(ctx->next_fxid))); - return false; /* corrupt */ - case XID_PRECEDES_RELMIN: - report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u precedes relation freeze threshold %u:%u", - xvac, - EpochFromFullTransactionId(ctx->relfrozenfxid), - XidFromFullTransactionId(ctx->relfrozenfxid))); - return false; /* corrupt */ - break; - case XID_PRECEDES_CLUSTERMIN: - report_corruption(ctx, - psprintf("old-style VACUUM FULL transaction ID %u precedes oldest valid transaction ID %u:%u", - xvac, - EpochFromFullTransactionId(ctx->oldest_fxid), - XidFromFullTransactionId(ctx->oldest_fxid))); - return false; /* corrupt */ - break; - case XID_BOUNDS_OK: - switch (status) - { - case XID_IN_PROGRESS: - return true; /* HEAPTUPLE_DELETE_IN_PROGRESS */ - case XID_COMMITTED: - case XID_ABORTED: - return false; /* HEAPTUPLE_DEAD */ - } + *tuple_is_readable = true; + return; } } - else + else if (tuphdr->t_infomask & HEAP_MOVED_IN) { - XidCommitStatus status; + TransactionId xvac = HeapTupleHeaderGetXvac(tuphdr); + + /* XXX sanity check xvac with get_xid_status */ - switch (get_xid_status(raw_xmin, ctx, &status)) + /* + * Same as above, but now pruning can happen if xvac did not + * commit. + */ + if (!TransactionIdDidCommit(xvac)) { - case XID_INVALID: - report_corruption(ctx, - pstrdup("raw xmin is invalid")); - return false; - case XID_IN_FUTURE: - report_corruption(ctx, - psprintf("raw xmin %u equals or exceeds next valid transaction ID %u:%u", - raw_xmin, - EpochFromFullTransactionId(ctx->next_fxid), - XidFromFullTransactionId(ctx->next_fxid))); - return false; /* corrupt */ - case XID_PRECEDES_RELMIN: - report_corruption(ctx, - psprintf("raw xmin %u precedes relation freeze threshold %u:%u", - raw_xmin, - EpochFromFullTransactionId(ctx->relfrozenfxid), - XidFromFullTransactionId(ctx->relfrozenfxid))); - return false; /* corrupt */ - case XID_PRECEDES_CLUSTERMIN: - report_corruption(ctx, - psprintf("raw xmin %u precedes oldest valid transaction ID %u:%u", - raw_xmin, - EpochFromFullTransactionId(ctx->oldest_fxid), - XidFromFullTransactionId(ctx->oldest_fxid))); - return false; /* corrupt */ - case XID_BOUNDS_OK: - switch (status) - { - case XID_COMMITTED: - break; - case XID_IN_PROGRESS: - return true; /* insert or delete in progress */ - case XID_ABORTED: - return false; /* HEAPTUPLE_DEAD */ - } + *tuple_is_readable = true; + return; } } + else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuphdr))) + { + /* + * Don't try to check tuples from uncommitted transactions, even + * though technically it might be safe when it's our own + * transaction (since we can any DDL we did ourselves). + */ + return; + } + else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuphdr))) + { + /* Still running, not us, definitely can't check. */ + return; + } + else if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuphdr))) + { + /* Inserter aborted or crashed, definitely can't check. */ + return; + } + } + + /* + * Okay, the inserter committed, so it was good at some point. Now what + * about the deleting transaction? + */ + *tuple_is_readable = true; + + if ((tuphdr->t_infomask & HEAP_XMAX_INVALID) || + HEAP_XMAX_IS_LOCKED_ONLY(tuphdr->t_infomask)) + { + /* + * The tuple is not deleted yet. Even if it gets deleted in the near + * future, it can't be pruned while we're still running because + * it must still be visible to our snapshot. + */ + *tuple_cannot_die_now = true; + return; } - if (!(infomask & HEAP_XMAX_INVALID) && !HEAP_XMAX_IS_LOCKED_ONLY(infomask)) + if (tuphdr->t_infomask & HEAP_XMAX_IS_MULTI) { - if (infomask & HEAP_XMAX_IS_MULTI) + TransactionId xmax = HeapTupleGetUpdateXid(tuphdr); + + /* XXX sanity check the update XID with get_xid_status */ + + if (TransactionIdIsInProgress(xmax)) { - XidCommitStatus status; - TransactionId xmax = HeapTupleGetUpdateXid(tuphdr); + /* + * Since the deleting transaction is still in progress, the + * delete can't be visible to our snapshot. + */ + *tuple_cannot_die_now = true; + return; + } + else if (TransactionIdDidCommit(xmax)) + { + /* + * The update XID is no longer running, and it did commit. So the + * tuple could be pruned if the XID is old enough. + */ + if (TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuphdr), + ctx->safe_xmin)) + return; + } - switch (get_xid_status(xmax, ctx, &status)) - { - /* not LOCKED_ONLY, so it has to have an xmax */ - case XID_INVALID: - report_corruption(ctx, - pstrdup("xmax is invalid")); - return false; /* corrupt */ - case XID_IN_FUTURE: - report_corruption(ctx, - psprintf("xmax %u equals or exceeds next valid transaction ID %u:%u", - xmax, - EpochFromFullTransactionId(ctx->next_fxid), - XidFromFullTransactionId(ctx->next_fxid))); - return false; /* corrupt */ - case XID_PRECEDES_RELMIN: - report_corruption(ctx, - psprintf("xmax %u precedes relation freeze threshold %u:%u", - xmax, - EpochFromFullTransactionId(ctx->relfrozenfxid), - XidFromFullTransactionId(ctx->relfrozenfxid))); - return false; /* corrupt */ - case XID_PRECEDES_CLUSTERMIN: - report_corruption(ctx, - psprintf("xmax %u precedes oldest valid transaction ID %u:%u", - xmax, - EpochFromFullTransactionId(ctx->oldest_fxid), - XidFromFullTransactionId(ctx->oldest_fxid))); - return false; /* corrupt */ - case XID_BOUNDS_OK: - switch (status) - { - case XID_IN_PROGRESS: - return true; /* HEAPTUPLE_DELETE_IN_PROGRESS */ - case XID_COMMITTED: - case XID_ABORTED: - return false; /* HEAPTUPLE_RECENTLY_DEAD or - * HEAPTUPLE_DEAD */ - } - } + *tuple_cannot_die_now = true; + return; + } + + /* XXX sanity check xmax with get_xid_status */ - /* Ok, the tuple is live */ + /* + * Test for cases where there's been an update or delete, but recently + * enough that we don't have to worry about pruning yet. + */ + if (!(tuphdr->t_infomask & HEAP_XMAX_COMMITTED)) + { + if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuphdr))) + { + *tuple_cannot_die_now = true; + return; + } + else if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuphdr))) + { + *tuple_cannot_die_now = true; + return; } - else if (!(infomask & HEAP_XMAX_COMMITTED)) - return true; /* HEAPTUPLE_DELETE_IN_PROGRESS or - * HEAPTUPLE_LIVE */ - else - return false; /* HEAPTUPLE_RECENTLY_DEAD or HEAPTUPLE_DEAD */ } - return true; /* not dead */ + + /* The delete might be old enough that we have to worry about pruning. */ + if (!TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuphdr), + ctx->safe_xmin)) + *tuple_cannot_die_now = true; } /* @@ -1124,6 +1124,8 @@ check_tuple(HeapCheckContext *ctx) TransactionId xmax; bool fatal = false; uint16 infomask = ctx->tuphdr->t_infomask; + bool tuple_is_readable; + bool tuple_cannot_die_now; /* If xmin is normal, it should be within valid range */ xmin = HeapTupleHeaderGetXmin(ctx->tuphdr); @@ -1244,11 +1246,15 @@ check_tuple(HeapCheckContext *ctx) /* * Check various forms of tuple header corruption. If the header is too - * corrupt to continue checking, or if the tuple is not visible to anyone, - * we cannot continue with other checks. + * corrupt to continue checking, or if the inserter aborted, we cannot + * continue with other checks. */ - if (!check_tuple_header_and_visibilty(ctx->tuphdr, ctx)) + check_tuple_header_and_visibilty(ctx->tuphdr, ctx, + &tuple_is_readable, + &tuple_cannot_die_now); + if (!tuple_is_readable) return; + /* XXX skip TOAST checks if tuple_cannot_die_now is false */ /* * The tuple is visible, so it must be compatible with the current version