diff --git a/src/backend/utils/resowner/resowner.c b/src/backend/utils/resowner/resowner.c index 0955bcc..529209f 100644 --- a/src/backend/utils/resowner/resowner.c +++ b/src/backend/utils/resowner/resowner.c @@ -21,6 +21,7 @@ #include "postgres.h" #include "access/hash.h" +#include "miscadmin.h" #include "storage/predicate.h" #include "storage/proc.h" #include "utils/memutils.h" @@ -113,6 +114,7 @@ typedef struct ResourceOwnerData ResourceOwner CurrentResourceOwner = NULL; ResourceOwner CurTransactionResourceOwner = NULL; ResourceOwner TopTransactionResourceOwner = NULL; +ResourceOwner FirstRootResourceOwner = NULL; /* * List of add-on callbacks for resource releasing @@ -167,6 +169,11 @@ ResourceOwnerCreate(ResourceOwner parent, const char *name) owner->nextchild = parent->firstchild; parent->firstchild = owner; } + else + { + owner->nextchild = FirstRootResourceOwner; + FirstRootResourceOwner = owner; + } return owner; } @@ -442,6 +449,8 @@ ResourceOwnerDelete(ResourceOwner owner) * the owner tree. Better a leak than a crash. */ ResourceOwnerNewParent(owner, NULL); + Assert(FirstRootResourceOwner == owner); + FirstRootResourceOwner = owner->nextchild; /* And free the object. */ if (owner->buffers) @@ -502,6 +511,14 @@ ResourceOwnerNewParent(ResourceOwner owner, } } } + else + { + ResourceOwner *link = &FirstRootResourceOwner; + + while (*link != owner) + link = &((*link)->nextchild); + *link = owner->nextchild; + } if (newparent) { @@ -513,7 +530,8 @@ ResourceOwnerNewParent(ResourceOwner owner, else { owner->parent = NULL; - owner->nextchild = NULL; + owner->nextchild = FirstRootResourceOwner; + FirstRootResourceOwner = owner; } } @@ -1161,6 +1179,59 @@ ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snapshot) } /* + * Invoke a caller-supplied function on every snapshot registered with any + * resource owner in the system. The callback can abort the traversal by + * returning true. + */ +bool +ResourceOwnerWalkAllSnapshots(ResourceWalkSnapshotCallback callback, void *arg) +{ + ResourceOwner owner = FirstRootResourceOwner; + bool visited = false; + + /* + * We do this traversal non-recursively in order to avoid running out of + * stack space, which can otherwise happen with large numbers of nested + * subtransactions. The easiest way to do that is to search depth-first, + * so that we visit all of a given node's descendents before reaching its + * right sibling. When we've visited all of the node's descendents, we'll + * follow the last child's parent pointer back to that node, but visited + * will be set to true at that point, so we'll advance to the right + * sibling instead of traversing it again. + */ + while (owner != NULL) + { + if (!visited) + { + int i; + + for (i = 0; i < owner->nsnapshots; ++i) + if (callback(owner->snapshots[i], arg)) + return true; + + if (owner->firstchild != NULL) + { + owner = owner->firstchild; + continue; + } + } + + if (owner->nextchild != NULL) + { + owner = owner->nextchild; + visited = false; + } + else + { + owner = owner->parent; + visited = true; + } + } + + return false; +} + +/* * Debugging subroutine */ static void diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index d601efe..1fd73c8 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -122,14 +122,28 @@ typedef struct ActiveSnapshotElt /* Top of the stack of active snapshots */ static ActiveSnapshotElt *ActiveSnapshot = NULL; +/* Data for recomputing xmin */ +typedef struct SnapshotXminData +{ + TransactionId new_xmin; + int new_xmin_count; + int snapshots_remaining; +} SnapshotXminData; + +/* How many snapshots is resowner.c tracking for us? */ +static int RegisteredSnapshots = 0; + /* - * How many snapshots is resowner.c tracking for us? - * - * Note: for now, a simple counter is enough. However, if we ever want to be - * smarter about advancing our MyPgXact->xmin we will need to be more - * sophisticated about this, perhaps keeping our own list of snapshots. + * When a cursor is released, it can be advantageous to recompute our + * advertised xmin to avoid system-wide impacts. However, we don't want + * to do this recomputation needlessly. After the first xmin recomputation + * in a particular transaction, these variables will indicate the result of + * the previous recomputation and the number of snapshots which at that time + * had that xmin. The actual number of snapshots with that xmin can be higher + * than the value indicated here, but cannot be lower. */ -static int RegisteredSnapshots = 0; +static TransactionId RegisteredSnapshotOldestXmin = InvalidTransactionId; +static int RegisteredSnapshotOldestXminCount = 0; /* first GetTransactionSnapshot call in a transaction? */ bool FirstSnapshotSet = false; @@ -153,6 +167,7 @@ static List *exportedSnapshots = NIL; static Snapshot CopySnapshot(Snapshot snapshot); static void FreeSnapshot(Snapshot snapshot); +static bool SnapshotResetXminWorker(Snapshot snapshot, void *data); static void SnapshotResetXmin(void); @@ -675,6 +690,8 @@ UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner) ResourceOwnerForgetSnapshot(owner, snapshot); RegisteredSnapshots--; + if (TransactionIdEquals(snapshot->xmin, RegisteredSnapshotOldestXmin)) + RegisteredSnapshotOldestXminCount--; if (--snapshot->regd_count == 0 && snapshot->active_count == 0) { FreeSnapshot(snapshot); @@ -688,12 +705,77 @@ UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner) * If there are no more snapshots, we can reset our PGXACT->xmin to InvalidXid. * Note we can do this without locking because we assume that storing an Xid * is atomic. + * + * Even if there are some remaining snapshots, we may be able to advance our + * PGXACT->xmin to some degree. This typically happens when a portal is + * dropped. For efficiency, we only consider recomputing PGXACT->xmin when + * the active snapshot stack is empty. */ static void SnapshotResetXmin(void) { - if (RegisteredSnapshots == 0 && ActiveSnapshot == NULL) + SnapshotXminData sxd; + ListCell *lc; + + if (ActiveSnapshot != NULL) + return; + + if (RegisteredSnapshots == 0) + { MyPgXact->xmin = InvalidTransactionId; + RegisteredSnapshotOldestXmin = InvalidTransactionId; + RegisteredSnapshotOldestXminCount = 0; + return; + } + + if (RegisteredSnapshotOldestXminCount > 0) + return; + + sxd.new_xmin = InvalidTransactionId; + sxd.new_xmin_count = 0; + sxd.snapshots_remaining = RegisteredSnapshots; + + if (FirstXactSnapshot != NULL) + SnapshotResetXminWorker(FirstXactSnapshot, &sxd); + + foreach(lc, exportedSnapshots) + { + Snapshot snapshot = lfirst(lc); + + SnapshotResetXminWorker(snapshot, &sxd); + } + + ResourceOwnerWalkAllSnapshots(SnapshotResetXminWorker, &sxd); + + if (sxd.snapshots_remaining > 0) + elog(FATAL, "failed to traverse all snapshots"); + + MyPgXact->xmin = sxd.new_xmin; + RegisteredSnapshotOldestXmin = sxd.new_xmin; + RegisteredSnapshotOldestXminCount = sxd.new_xmin_count; +} + +static bool +SnapshotResetXminWorker(Snapshot snapshot, void *data) +{ + SnapshotXminData *sxd = data; + + if (TransactionIdEquals(snapshot->xmin, sxd->new_xmin)) + { + Assert(TransactionIdIsNormal(snapshot->xmin)); + sxd->new_xmin_count++; + } + else if (!TransactionIdIsValid(sxd->new_xmin) + || TransactionIdPrecedes(snapshot->xmin, sxd->new_xmin)) + { + sxd->new_xmin = snapshot->xmin; + sxd->new_xmin_count = 1; + } + + if (--sxd->snapshots_remaining < 0) + elog(FATAL, "traversed too many snapshots"); + + return false; } /* @@ -830,6 +912,8 @@ AtEOXact_Snapshot(bool isCommit) */ ActiveSnapshot = NULL; RegisteredSnapshots = 0; + RegisteredSnapshotOldestXmin = InvalidTransactionId; + RegisteredSnapshotOldestXminCount = 0; CurrentSnapshot = NULL; SecondarySnapshot = NULL; diff --git a/src/include/utils/resowner_private.h b/src/include/utils/resowner_private.h index b8fd1a9..9944144 100644 --- a/src/include/utils/resowner_private.h +++ b/src/include/utils/resowner_private.h @@ -73,6 +73,9 @@ extern void ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snapshot); extern void ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snapshot); +typedef bool (*ResourceWalkSnapshotCallback) (Snapshot snapshot, void *arg); +extern bool ResourceOwnerWalkAllSnapshots(ResourceWalkSnapshotCallback callback, + void *arg); /* support for temporary file management */ extern void ResourceOwnerEnlargeFiles(ResourceOwner owner);