mirror of
https://github.com/postgres/postgres.git
synced 2025-05-25 00:04:05 -04:00
The old coding was using a regular snapshot, referenced elsewhere, that was subject to having its command counter updated. Fix by creating a private copy of the snapshot exclusively for the cursor. Backpatch to 8.4, which is when the bug was introduced during the snapshot management rewrite.
546 lines
14 KiB
C
546 lines
14 KiB
C
/*-------------------------------------------------------------------------
|
|
* snapmgr.c
|
|
* PostgreSQL snapshot manager
|
|
*
|
|
* We keep track of snapshots in two ways: those "registered" by resowner.c,
|
|
* and the "active snapshot" stack. All snapshots in either of them live in
|
|
* persistent memory. When a snapshot is no longer in any of these lists
|
|
* (tracked by separate refcounts on each snapshot), its memory can be freed.
|
|
*
|
|
* These arrangements let us reset MyProc->xmin when there are no snapshots
|
|
* referenced by this transaction. (One possible improvement would be to be
|
|
* able to advance Xmin when the snapshot with the earliest Xmin is no longer
|
|
* referenced. That's a bit harder though, it requires more locking, and
|
|
* anyway it should be rather uncommon to keep snapshots referenced for too
|
|
* long.)
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* $PostgreSQL: pgsql/src/backend/utils/time/snapmgr.c,v 1.10.2.1 2009/10/02 17:58:21 alvherre Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "access/transam.h"
|
|
#include "access/xact.h"
|
|
#include "storage/proc.h"
|
|
#include "storage/procarray.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/memutils.h"
|
|
#include "utils/resowner.h"
|
|
#include "utils/snapmgr.h"
|
|
#include "utils/tqual.h"
|
|
|
|
|
|
/*
|
|
* CurrentSnapshot points to the only snapshot taken in a serializable
|
|
* transaction, and to the latest one taken in a read-committed transaction.
|
|
* SecondarySnapshot is a snapshot that's always up-to-date as of the current
|
|
* instant, even on a serializable transaction. It should only be used for
|
|
* special-purpose code (say, RI checking.)
|
|
*
|
|
* These SnapshotData structs are static to simplify memory allocation
|
|
* (see the hack in GetSnapshotData to avoid repeated malloc/free).
|
|
*/
|
|
static SnapshotData CurrentSnapshotData = {HeapTupleSatisfiesMVCC};
|
|
static SnapshotData SecondarySnapshotData = {HeapTupleSatisfiesMVCC};
|
|
|
|
/* Pointers to valid snapshots */
|
|
static Snapshot CurrentSnapshot = NULL;
|
|
static Snapshot SecondarySnapshot = NULL;
|
|
|
|
/*
|
|
* These are updated by GetSnapshotData. We initialize them this way
|
|
* for the convenience of TransactionIdIsInProgress: even in bootstrap
|
|
* mode, we don't want it to say that BootstrapTransactionId is in progress.
|
|
*
|
|
* RecentGlobalXmin is initialized to InvalidTransactionId, to ensure that no
|
|
* one tries to use a stale value. Readers should ensure that it has been set
|
|
* to something else before using it.
|
|
*/
|
|
TransactionId TransactionXmin = FirstNormalTransactionId;
|
|
TransactionId RecentXmin = FirstNormalTransactionId;
|
|
TransactionId RecentGlobalXmin = InvalidTransactionId;
|
|
|
|
/*
|
|
* Elements of the active snapshot stack.
|
|
*
|
|
* Each element here accounts for exactly one active_count on SnapshotData.
|
|
*
|
|
* NB: the code assumes that elements in this list are in non-increasing
|
|
* order of as_level; also, the list must be NULL-terminated.
|
|
*/
|
|
typedef struct ActiveSnapshotElt
|
|
{
|
|
Snapshot as_snap;
|
|
int as_level;
|
|
struct ActiveSnapshotElt *as_next;
|
|
} ActiveSnapshotElt;
|
|
|
|
/* Top of the stack of active snapshots */
|
|
static ActiveSnapshotElt *ActiveSnapshot = NULL;
|
|
|
|
/*
|
|
* How many snapshots is resowner.c tracking for us?
|
|
*
|
|
* Note: for now, a simple counter is enough. However, if we ever want to be
|
|
* smarter about advancing our MyProc->xmin we will need to be more
|
|
* sophisticated about this, perhaps keeping our own list of snapshots.
|
|
*/
|
|
static int RegisteredSnapshots = 0;
|
|
|
|
/* first GetTransactionSnapshot call in a transaction? */
|
|
bool FirstSnapshotSet = false;
|
|
|
|
/*
|
|
* Remembers whether this transaction registered a serializable snapshot at
|
|
* start. We cannot trust FirstSnapshotSet in combination with
|
|
* IsXactIsoLevelSerializable, because GUC may be reset before us.
|
|
*/
|
|
static bool registered_serializable = false;
|
|
|
|
|
|
static void FreeSnapshot(Snapshot snapshot);
|
|
static void SnapshotResetXmin(void);
|
|
|
|
|
|
/*
|
|
* GetTransactionSnapshot
|
|
* Get the appropriate snapshot for a new query in a transaction.
|
|
*
|
|
* Note that the return value may point at static storage that will be modified
|
|
* by future calls and by CommandCounterIncrement(). Callers should call
|
|
* RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be
|
|
* used very long.
|
|
*/
|
|
Snapshot
|
|
GetTransactionSnapshot(void)
|
|
{
|
|
/* First call in transaction? */
|
|
if (!FirstSnapshotSet)
|
|
{
|
|
Assert(RegisteredSnapshots == 0);
|
|
|
|
CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
|
|
FirstSnapshotSet = true;
|
|
|
|
/*
|
|
* In serializable mode, the first snapshot must live until end of
|
|
* xact regardless of what the caller does with it, so we must
|
|
* register it internally here and unregister it at end of xact.
|
|
*/
|
|
if (IsXactIsoLevelSerializable)
|
|
{
|
|
CurrentSnapshot = RegisterSnapshotOnOwner(CurrentSnapshot,
|
|
TopTransactionResourceOwner);
|
|
registered_serializable = true;
|
|
}
|
|
|
|
return CurrentSnapshot;
|
|
}
|
|
|
|
if (IsXactIsoLevelSerializable)
|
|
return CurrentSnapshot;
|
|
|
|
CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData);
|
|
|
|
return CurrentSnapshot;
|
|
}
|
|
|
|
/*
|
|
* GetLatestSnapshot
|
|
* Get a snapshot that is up-to-date as of the current instant,
|
|
* even if we are executing in SERIALIZABLE mode.
|
|
*/
|
|
Snapshot
|
|
GetLatestSnapshot(void)
|
|
{
|
|
/* Should not be first call in transaction */
|
|
if (!FirstSnapshotSet)
|
|
elog(ERROR, "no snapshot has been set");
|
|
|
|
SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData);
|
|
|
|
return SecondarySnapshot;
|
|
}
|
|
|
|
/*
|
|
* SnapshotSetCommandId
|
|
* Propagate CommandCounterIncrement into the static snapshots, if set
|
|
*/
|
|
void
|
|
SnapshotSetCommandId(CommandId curcid)
|
|
{
|
|
if (!FirstSnapshotSet)
|
|
return;
|
|
|
|
if (CurrentSnapshot)
|
|
CurrentSnapshot->curcid = curcid;
|
|
if (SecondarySnapshot)
|
|
SecondarySnapshot->curcid = curcid;
|
|
}
|
|
|
|
/*
|
|
* CopySnapshot
|
|
* Copy the given snapshot.
|
|
*
|
|
* The copy is palloc'd in TopTransactionContext and has initial refcounts set
|
|
* to 0. The returned snapshot has the copied flag set.
|
|
*/
|
|
Snapshot
|
|
CopySnapshot(Snapshot snapshot)
|
|
{
|
|
Snapshot newsnap;
|
|
Size subxipoff;
|
|
Size size;
|
|
|
|
Assert(snapshot != InvalidSnapshot);
|
|
|
|
/* We allocate any XID arrays needed in the same palloc block. */
|
|
size = subxipoff = sizeof(SnapshotData) +
|
|
snapshot->xcnt * sizeof(TransactionId);
|
|
if (snapshot->subxcnt > 0)
|
|
size += snapshot->subxcnt * sizeof(TransactionId);
|
|
|
|
newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size);
|
|
memcpy(newsnap, snapshot, sizeof(SnapshotData));
|
|
|
|
newsnap->regd_count = 0;
|
|
newsnap->active_count = 0;
|
|
newsnap->copied = true;
|
|
|
|
/* setup XID array */
|
|
if (snapshot->xcnt > 0)
|
|
{
|
|
newsnap->xip = (TransactionId *) (newsnap + 1);
|
|
memcpy(newsnap->xip, snapshot->xip,
|
|
snapshot->xcnt * sizeof(TransactionId));
|
|
}
|
|
else
|
|
newsnap->xip = NULL;
|
|
|
|
/* setup subXID array */
|
|
if (snapshot->subxcnt > 0)
|
|
{
|
|
newsnap->subxip = (TransactionId *) ((char *) newsnap + subxipoff);
|
|
memcpy(newsnap->subxip, snapshot->subxip,
|
|
snapshot->subxcnt * sizeof(TransactionId));
|
|
}
|
|
else
|
|
newsnap->subxip = NULL;
|
|
|
|
return newsnap;
|
|
}
|
|
|
|
/*
|
|
* FreeSnapshot
|
|
* Free the memory associated with a snapshot.
|
|
*/
|
|
static void
|
|
FreeSnapshot(Snapshot snapshot)
|
|
{
|
|
Assert(snapshot->regd_count == 0);
|
|
Assert(snapshot->active_count == 0);
|
|
Assert(snapshot->copied);
|
|
|
|
pfree(snapshot);
|
|
}
|
|
|
|
/*
|
|
* PushActiveSnapshot
|
|
* Set the given snapshot as the current active snapshot
|
|
*
|
|
* If this is the first use of this snapshot, create a new long-lived copy with
|
|
* active refcount=1. Otherwise, only increment the refcount.
|
|
*/
|
|
void
|
|
PushActiveSnapshot(Snapshot snap)
|
|
{
|
|
ActiveSnapshotElt *newactive;
|
|
|
|
Assert(snap != InvalidSnapshot);
|
|
|
|
newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt));
|
|
/* Static snapshot? Create a persistent copy */
|
|
newactive->as_snap = snap->copied ? snap : CopySnapshot(snap);
|
|
newactive->as_next = ActiveSnapshot;
|
|
newactive->as_level = GetCurrentTransactionNestLevel();
|
|
|
|
newactive->as_snap->active_count++;
|
|
|
|
ActiveSnapshot = newactive;
|
|
}
|
|
|
|
/*
|
|
* PushUpdatedSnapshot
|
|
* As above, except we set the snapshot's CID to the current CID.
|
|
*/
|
|
void
|
|
PushUpdatedSnapshot(Snapshot snapshot)
|
|
{
|
|
Snapshot newsnap;
|
|
|
|
/*
|
|
* We cannot risk modifying a snapshot that's possibly already used
|
|
* elsewhere, so make a new copy to scribble on.
|
|
*/
|
|
newsnap = CopySnapshot(snapshot);
|
|
newsnap->curcid = GetCurrentCommandId(false);
|
|
|
|
PushActiveSnapshot(newsnap);
|
|
}
|
|
|
|
/*
|
|
* PopActiveSnapshot
|
|
*
|
|
* Remove the topmost snapshot from the active snapshot stack, decrementing the
|
|
* reference count, and free it if this was the last reference.
|
|
*/
|
|
void
|
|
PopActiveSnapshot(void)
|
|
{
|
|
ActiveSnapshotElt *newstack;
|
|
|
|
newstack = ActiveSnapshot->as_next;
|
|
|
|
Assert(ActiveSnapshot->as_snap->active_count > 0);
|
|
|
|
ActiveSnapshot->as_snap->active_count--;
|
|
|
|
if (ActiveSnapshot->as_snap->active_count == 0 &&
|
|
ActiveSnapshot->as_snap->regd_count == 0)
|
|
FreeSnapshot(ActiveSnapshot->as_snap);
|
|
|
|
pfree(ActiveSnapshot);
|
|
ActiveSnapshot = newstack;
|
|
|
|
SnapshotResetXmin();
|
|
}
|
|
|
|
/*
|
|
* GetActiveSnapshot
|
|
* Return the topmost snapshot in the Active stack.
|
|
*/
|
|
Snapshot
|
|
GetActiveSnapshot(void)
|
|
{
|
|
Assert(ActiveSnapshot != NULL);
|
|
|
|
return ActiveSnapshot->as_snap;
|
|
}
|
|
|
|
/*
|
|
* ActiveSnapshotSet
|
|
* Return whether there is at least one snapshot in the Active stack
|
|
*/
|
|
bool
|
|
ActiveSnapshotSet(void)
|
|
{
|
|
return ActiveSnapshot != NULL;
|
|
}
|
|
|
|
/*
|
|
* RegisterSnapshot
|
|
* Register a snapshot as being in use by the current resource owner
|
|
*
|
|
* If InvalidSnapshot is passed, it is not registered.
|
|
*/
|
|
Snapshot
|
|
RegisterSnapshot(Snapshot snapshot)
|
|
{
|
|
if (snapshot == InvalidSnapshot)
|
|
return InvalidSnapshot;
|
|
|
|
return RegisterSnapshotOnOwner(snapshot, CurrentResourceOwner);
|
|
}
|
|
|
|
/*
|
|
* RegisterSnapshotOnOwner
|
|
* As above, but use the specified resource owner
|
|
*/
|
|
Snapshot
|
|
RegisterSnapshotOnOwner(Snapshot snapshot, ResourceOwner owner)
|
|
{
|
|
Snapshot snap;
|
|
|
|
if (snapshot == InvalidSnapshot)
|
|
return InvalidSnapshot;
|
|
|
|
/* Static snapshot? Create a persistent copy */
|
|
snap = snapshot->copied ? snapshot : CopySnapshot(snapshot);
|
|
|
|
/* and tell resowner.c about it */
|
|
ResourceOwnerEnlargeSnapshots(owner);
|
|
snap->regd_count++;
|
|
ResourceOwnerRememberSnapshot(owner, snap);
|
|
|
|
RegisteredSnapshots++;
|
|
|
|
return snap;
|
|
}
|
|
|
|
/*
|
|
* UnregisterSnapshot
|
|
*
|
|
* Decrement the reference count of a snapshot, remove the corresponding
|
|
* reference from CurrentResourceOwner, and free the snapshot if no more
|
|
* references remain.
|
|
*/
|
|
void
|
|
UnregisterSnapshot(Snapshot snapshot)
|
|
{
|
|
if (snapshot == NULL)
|
|
return;
|
|
|
|
UnregisterSnapshotFromOwner(snapshot, CurrentResourceOwner);
|
|
}
|
|
|
|
/*
|
|
* UnregisterSnapshotFromOwner
|
|
* As above, but use the specified resource owner
|
|
*/
|
|
void
|
|
UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner)
|
|
{
|
|
if (snapshot == NULL)
|
|
return;
|
|
|
|
Assert(snapshot->regd_count > 0);
|
|
Assert(RegisteredSnapshots > 0);
|
|
|
|
ResourceOwnerForgetSnapshot(owner, snapshot);
|
|
RegisteredSnapshots--;
|
|
if (--snapshot->regd_count == 0 && snapshot->active_count == 0)
|
|
{
|
|
FreeSnapshot(snapshot);
|
|
SnapshotResetXmin();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* SnapshotResetXmin
|
|
*
|
|
* If there are no more snapshots, we can reset our PGPROC->xmin to InvalidXid.
|
|
* Note we can do this without locking because we assume that storing an Xid
|
|
* is atomic.
|
|
*/
|
|
static void
|
|
SnapshotResetXmin(void)
|
|
{
|
|
if (RegisteredSnapshots == 0 && ActiveSnapshot == NULL)
|
|
MyProc->xmin = InvalidTransactionId;
|
|
}
|
|
|
|
/*
|
|
* AtSubCommit_Snapshot
|
|
*/
|
|
void
|
|
AtSubCommit_Snapshot(int level)
|
|
{
|
|
ActiveSnapshotElt *active;
|
|
|
|
/*
|
|
* Relabel the active snapshots set in this subtransaction as though they
|
|
* are owned by the parent subxact.
|
|
*/
|
|
for (active = ActiveSnapshot; active != NULL; active = active->as_next)
|
|
{
|
|
if (active->as_level < level)
|
|
break;
|
|
active->as_level = level - 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* AtSubAbort_Snapshot
|
|
* Clean up snapshots after a subtransaction abort
|
|
*/
|
|
void
|
|
AtSubAbort_Snapshot(int level)
|
|
{
|
|
/* Forget the active snapshots set by this subtransaction */
|
|
while (ActiveSnapshot && ActiveSnapshot->as_level >= level)
|
|
{
|
|
ActiveSnapshotElt *next;
|
|
|
|
next = ActiveSnapshot->as_next;
|
|
|
|
/*
|
|
* Decrement the snapshot's active count. If it's still registered or
|
|
* marked as active by an outer subtransaction, we can't free it yet.
|
|
*/
|
|
Assert(ActiveSnapshot->as_snap->active_count >= 1);
|
|
ActiveSnapshot->as_snap->active_count -= 1;
|
|
|
|
if (ActiveSnapshot->as_snap->active_count == 0 &&
|
|
ActiveSnapshot->as_snap->regd_count == 0)
|
|
FreeSnapshot(ActiveSnapshot->as_snap);
|
|
|
|
/* and free the stack element */
|
|
pfree(ActiveSnapshot);
|
|
|
|
ActiveSnapshot = next;
|
|
}
|
|
|
|
SnapshotResetXmin();
|
|
}
|
|
|
|
/*
|
|
* AtEarlyCommit_Snapshot
|
|
*
|
|
* Snapshot manager's cleanup function, to be called on commit, before
|
|
* doing resowner.c resource release.
|
|
*/
|
|
void
|
|
AtEarlyCommit_Snapshot(void)
|
|
{
|
|
/*
|
|
* On a serializable transaction we must unregister our private refcount
|
|
* to the serializable snapshot.
|
|
*/
|
|
if (registered_serializable)
|
|
UnregisterSnapshotFromOwner(CurrentSnapshot,
|
|
TopTransactionResourceOwner);
|
|
registered_serializable = false;
|
|
|
|
}
|
|
|
|
/*
|
|
* AtEOXact_Snapshot
|
|
* Snapshot manager's cleanup function for end of transaction
|
|
*/
|
|
void
|
|
AtEOXact_Snapshot(bool isCommit)
|
|
{
|
|
/* On commit, complain about leftover snapshots */
|
|
if (isCommit)
|
|
{
|
|
ActiveSnapshotElt *active;
|
|
|
|
if (RegisteredSnapshots != 0)
|
|
elog(WARNING, "%d registered snapshots seem to remain after cleanup",
|
|
RegisteredSnapshots);
|
|
|
|
/* complain about unpopped active snapshots */
|
|
for (active = ActiveSnapshot; active != NULL; active = active->as_next)
|
|
elog(WARNING, "snapshot %p still active", active);
|
|
}
|
|
|
|
/*
|
|
* And reset our state. We don't need to free the memory explicitly --
|
|
* it'll go away with TopTransactionContext.
|
|
*/
|
|
ActiveSnapshot = NULL;
|
|
RegisteredSnapshots = 0;
|
|
|
|
CurrentSnapshot = NULL;
|
|
SecondarySnapshot = NULL;
|
|
|
|
FirstSnapshotSet = false;
|
|
registered_serializable = false;
|
|
}
|