mirror of
https://github.com/postgres/postgres.git
synced 2025-06-03 00:02:26 -04:00
Fix race condition in relcache init file invalidation.
The previous code tried to synchronize by unlinking the init file twice, but that doesn't actually work: it leaves a window wherein a third process could read the already-stale init file but miss the SI messages that would tell it the data is stale. The result would be bizarre failures in catalog accesses, typically "could not read block 0 in file ..." later during startup. Instead, hold RelCacheInitLock across both the unlink and the sending of the SI messages. This is more straightforward, and might even be a bit faster since only one unlink call is needed. This has been wrong since it was put in (in 2002!), so back-patch to all supported releases.
This commit is contained in:
parent
1bb69245ab
commit
2ada6779c5
@ -1356,10 +1356,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
|
|||||||
* after we send the SI messages. See AtEOXact_Inval()
|
* after we send the SI messages. See AtEOXact_Inval()
|
||||||
*/
|
*/
|
||||||
if (hdr->initfileinval)
|
if (hdr->initfileinval)
|
||||||
RelationCacheInitFileInvalidate(true);
|
RelationCacheInitFilePreInvalidate();
|
||||||
SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs);
|
SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs);
|
||||||
if (hdr->initfileinval)
|
if (hdr->initfileinval)
|
||||||
RelationCacheInitFileInvalidate(false);
|
RelationCacheInitFilePostInvalidate();
|
||||||
|
|
||||||
/* And now do the callbacks */
|
/* And now do the callbacks */
|
||||||
if (isCommit)
|
if (isCommit)
|
||||||
|
33
src/backend/utils/cache/inval.c
vendored
33
src/backend/utils/cache/inval.c
vendored
@ -854,24 +854,12 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
|
|||||||
return numSharedInvalidMessagesArray;
|
return numSharedInvalidMessagesArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define RecoveryRelationCacheInitFileInvalidate(dbo, tbo, tf) \
|
|
||||||
{ \
|
|
||||||
DatabasePath = GetDatabasePath(dbo, tbo); \
|
|
||||||
elog(trace_recovery(DEBUG4), "removing relcache init file in %s", DatabasePath); \
|
|
||||||
RelationCacheInitFileInvalidate(tf); \
|
|
||||||
pfree(DatabasePath); \
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ProcessCommittedInvalidationMessages is executed by xact_redo_commit()
|
* ProcessCommittedInvalidationMessages is executed by xact_redo_commit()
|
||||||
* to process invalidation messages added to commit records.
|
* to process invalidation messages added to commit records.
|
||||||
*
|
*
|
||||||
* Relcache init file invalidation requires processing both
|
* Relcache init file invalidation requires processing both
|
||||||
* before and after we send the SI messages. See AtEOXact_Inval()
|
* before and after we send the SI messages. See AtEOXact_Inval()
|
||||||
*
|
|
||||||
* We deliberately avoid SetDatabasePath() since it is intended to be used
|
|
||||||
* only once by normal backends, so we set DatabasePath directly then
|
|
||||||
* pfree after use. See RecoveryRelationCacheInitFileInvalidate() macro.
|
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
|
ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
|
||||||
@ -885,12 +873,25 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
|
|||||||
(RelcacheInitFileInval ? " and relcache file invalidation" : ""));
|
(RelcacheInitFileInval ? " and relcache file invalidation" : ""));
|
||||||
|
|
||||||
if (RelcacheInitFileInval)
|
if (RelcacheInitFileInval)
|
||||||
RecoveryRelationCacheInitFileInvalidate(dbid, tsid, true);
|
{
|
||||||
|
/*
|
||||||
|
* RelationCacheInitFilePreInvalidate requires DatabasePath to be set,
|
||||||
|
* but we should not use SetDatabasePath during recovery, since it is
|
||||||
|
* intended to be used only once by normal backends. Hence, a quick
|
||||||
|
* hack: set DatabasePath directly then unset after use.
|
||||||
|
*/
|
||||||
|
DatabasePath = GetDatabasePath(dbid, tsid);
|
||||||
|
elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"",
|
||||||
|
DatabasePath);
|
||||||
|
RelationCacheInitFilePreInvalidate();
|
||||||
|
pfree(DatabasePath);
|
||||||
|
DatabasePath = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
SendSharedInvalidMessages(msgs, nmsgs);
|
SendSharedInvalidMessages(msgs, nmsgs);
|
||||||
|
|
||||||
if (RelcacheInitFileInval)
|
if (RelcacheInitFileInval)
|
||||||
RecoveryRelationCacheInitFileInvalidate(dbid, tsid, false);
|
RelationCacheInitFilePostInvalidate();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -931,7 +932,7 @@ AtEOXact_Inval(bool isCommit)
|
|||||||
* unless we committed.
|
* unless we committed.
|
||||||
*/
|
*/
|
||||||
if (transInvalInfo->RelcacheInitFileInval)
|
if (transInvalInfo->RelcacheInitFileInval)
|
||||||
RelationCacheInitFileInvalidate(true);
|
RelationCacheInitFilePreInvalidate();
|
||||||
|
|
||||||
AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
|
AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
|
||||||
&transInvalInfo->CurrentCmdInvalidMsgs);
|
&transInvalInfo->CurrentCmdInvalidMsgs);
|
||||||
@ -940,7 +941,7 @@ AtEOXact_Inval(bool isCommit)
|
|||||||
SendSharedInvalidMessages);
|
SendSharedInvalidMessages);
|
||||||
|
|
||||||
if (transInvalInfo->RelcacheInitFileInval)
|
if (transInvalInfo->RelcacheInitFileInval)
|
||||||
RelationCacheInitFileInvalidate(false);
|
RelationCacheInitFilePostInvalidate();
|
||||||
}
|
}
|
||||||
else if (transInvalInfo != NULL)
|
else if (transInvalInfo != NULL)
|
||||||
{
|
{
|
||||||
|
66
src/backend/utils/cache/relcache.c
vendored
66
src/backend/utils/cache/relcache.c
vendored
@ -4377,8 +4377,8 @@ write_relcache_init_file(bool shared)
|
|||||||
* updated by SI message processing, but we can't be sure whether what we
|
* updated by SI message processing, but we can't be sure whether what we
|
||||||
* wrote out was up-to-date.)
|
* wrote out was up-to-date.)
|
||||||
*
|
*
|
||||||
* This mustn't run concurrently with RelationCacheInitFileInvalidate, so
|
* This mustn't run concurrently with the code that unlinks an init file
|
||||||
* grab a serialization lock for the duration.
|
* and sends SI messages, so grab a serialization lock for the duration.
|
||||||
*/
|
*/
|
||||||
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
|
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
@ -4442,19 +4442,22 @@ RelationIdIsInInitFile(Oid relationId)
|
|||||||
* changed one or more of the relation cache entries that are kept in the
|
* changed one or more of the relation cache entries that are kept in the
|
||||||
* local init file.
|
* local init file.
|
||||||
*
|
*
|
||||||
* We actually need to remove the init file twice: once just before sending
|
* To be safe against concurrent inspection or rewriting of the init file,
|
||||||
* the SI messages that include relcache inval for such relations, and once
|
* we must take RelCacheInitLock, then remove the old init file, then send
|
||||||
* just after sending them. The unlink before ensures that a backend that's
|
* the SI messages that include relcache inval for such relations, and then
|
||||||
* currently starting cannot read the now-obsolete init file and then miss
|
* release RelCacheInitLock. This serializes the whole affair against
|
||||||
* the SI messages that will force it to update its relcache entries. (This
|
* write_relcache_init_file, so that we can be sure that any other process
|
||||||
* works because the backend startup sequence gets into the PGPROC array before
|
* that's concurrently trying to create a new init file won't move an
|
||||||
* trying to load the init file.) The unlink after is to synchronize with a
|
* already-stale version into place after we unlink. Also, because we unlink
|
||||||
* backend that may currently be trying to write an init file based on data
|
* before sending the SI messages, a backend that's currently starting cannot
|
||||||
* that we've just rendered invalid. Such a backend will see the SI messages,
|
* read the now-obsolete init file and then miss the SI messages that will
|
||||||
* but we can't leave the init file sitting around to fool later backends.
|
* force it to update its relcache entries. (This works because the backend
|
||||||
|
* startup sequence gets into the sinval array before trying to load the init
|
||||||
|
* file.)
|
||||||
*
|
*
|
||||||
* Ignore any failure to unlink the file, since it might not be there if
|
* We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
|
||||||
* no backend has been started since the last removal.
|
* then release the lock in RelationCacheInitFilePostInvalidate. Caller must
|
||||||
|
* send any pending SI messages between those calls.
|
||||||
*
|
*
|
||||||
* Notice this deals only with the local init file, not the shared init file.
|
* Notice this deals only with the local init file, not the shared init file.
|
||||||
* The reason is that there can never be a "significant" change to the
|
* The reason is that there can never be a "significant" change to the
|
||||||
@ -4464,34 +4467,37 @@ RelationIdIsInInitFile(Oid relationId)
|
|||||||
* be invalid enough to make it necessary to remove it.
|
* be invalid enough to make it necessary to remove it.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
RelationCacheInitFileInvalidate(bool beforeSend)
|
RelationCacheInitFilePreInvalidate(void)
|
||||||
{
|
{
|
||||||
char initfilename[MAXPGPATH];
|
char initfilename[MAXPGPATH];
|
||||||
|
|
||||||
snprintf(initfilename, sizeof(initfilename), "%s/%s",
|
snprintf(initfilename, sizeof(initfilename), "%s/%s",
|
||||||
DatabasePath, RELCACHE_INIT_FILENAME);
|
DatabasePath, RELCACHE_INIT_FILENAME);
|
||||||
|
|
||||||
if (beforeSend)
|
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
|
||||||
{
|
|
||||||
/* no interlock needed here */
|
if (unlink(initfilename) < 0)
|
||||||
unlink(initfilename);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We need to interlock this against write_relcache_init_file, to
|
* The file might not be there if no backend has been started since
|
||||||
* guard against possibility that someone renames a new-but-
|
* the last removal. But complain about failures other than ENOENT.
|
||||||
* already-obsolete init file into place just after we unlink. With
|
* Fortunately, it's not too late to abort the transaction if we
|
||||||
* the interlock, it's certain that write_relcache_init_file will
|
* can't get rid of the would-be-obsolete init file.
|
||||||
* notice our SI inval message before renaming into place, or else
|
|
||||||
* that we will execute second and successfully unlink the file.
|
|
||||||
*/
|
*/
|
||||||
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
|
if (errno != ENOENT)
|
||||||
unlink(initfilename);
|
ereport(ERROR,
|
||||||
LWLockRelease(RelCacheInitLock);
|
(errcode_for_file_access(),
|
||||||
|
errmsg("could not remove cache file \"%s\": %m",
|
||||||
|
initfilename)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
RelationCacheInitFilePostInvalidate(void)
|
||||||
|
{
|
||||||
|
LWLockRelease(RelCacheInitLock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove the init files during postmaster startup.
|
* Remove the init files during postmaster startup.
|
||||||
*
|
*
|
||||||
|
@ -98,7 +98,8 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
|
|||||||
* Routines to help manage rebuilding of relcache init files
|
* Routines to help manage rebuilding of relcache init files
|
||||||
*/
|
*/
|
||||||
extern bool RelationIdIsInInitFile(Oid relationId);
|
extern bool RelationIdIsInInitFile(Oid relationId);
|
||||||
extern void RelationCacheInitFileInvalidate(bool beforeSend);
|
extern void RelationCacheInitFilePreInvalidate(void);
|
||||||
|
extern void RelationCacheInitFilePostInvalidate(void);
|
||||||
extern void RelationCacheInitFileRemove(void);
|
extern void RelationCacheInitFileRemove(void);
|
||||||
|
|
||||||
/* should be used only by relcache.c and catcache.c */
|
/* should be used only by relcache.c and catcache.c */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user