Fix race condition in relcache init file invalidation.

The previous code tried to synchronize by unlinking the init file twice,
but that doesn't actually work: it leaves a window wherein a third process
could read the already-stale init file but miss the SI messages that would
tell it the data is stale.  The result would be bizarre failures in catalog
accesses, typically "could not read block 0 in file ..." later during
startup.

Instead, hold RelCacheInitLock across both the unlink and the sending of
the SI messages.  This is more straightforward, and might even be a bit
faster since only one unlink call is needed.

This has been wrong since it was put in (in 2002!), so back-patch to all
supported releases.
This commit is contained in:
Tom Lane 2011-08-16 13:11:54 -04:00
parent 1bb69245ab
commit 2ada6779c5
4 changed files with 57 additions and 49 deletions

View File

@ -1356,10 +1356,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
* after we send the SI messages. See AtEOXact_Inval() * after we send the SI messages. See AtEOXact_Inval()
*/ */
if (hdr->initfileinval) if (hdr->initfileinval)
RelationCacheInitFileInvalidate(true); RelationCacheInitFilePreInvalidate();
SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs); SendSharedInvalidMessages(invalmsgs, hdr->ninvalmsgs);
if (hdr->initfileinval) if (hdr->initfileinval)
RelationCacheInitFileInvalidate(false); RelationCacheInitFilePostInvalidate();
/* And now do the callbacks */ /* And now do the callbacks */
if (isCommit) if (isCommit)

View File

@ -854,24 +854,12 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs,
return numSharedInvalidMessagesArray; return numSharedInvalidMessagesArray;
} }
#define RecoveryRelationCacheInitFileInvalidate(dbo, tbo, tf) \
{ \
DatabasePath = GetDatabasePath(dbo, tbo); \
elog(trace_recovery(DEBUG4), "removing relcache init file in %s", DatabasePath); \
RelationCacheInitFileInvalidate(tf); \
pfree(DatabasePath); \
}
/* /*
* ProcessCommittedInvalidationMessages is executed by xact_redo_commit() * ProcessCommittedInvalidationMessages is executed by xact_redo_commit()
* to process invalidation messages added to commit records. * to process invalidation messages added to commit records.
* *
* Relcache init file invalidation requires processing both * Relcache init file invalidation requires processing both
* before and after we send the SI messages. See AtEOXact_Inval() * before and after we send the SI messages. See AtEOXact_Inval()
*
* We deliberately avoid SetDatabasePath() since it is intended to be used
* only once by normal backends, so we set DatabasePath directly then
* pfree after use. See RecoveryRelationCacheInitFileInvalidate() macro.
*/ */
void void
ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
@ -885,12 +873,25 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs,
(RelcacheInitFileInval ? " and relcache file invalidation" : "")); (RelcacheInitFileInval ? " and relcache file invalidation" : ""));
if (RelcacheInitFileInval) if (RelcacheInitFileInval)
RecoveryRelationCacheInitFileInvalidate(dbid, tsid, true); {
/*
* RelationCacheInitFilePreInvalidate requires DatabasePath to be set,
* but we should not use SetDatabasePath during recovery, since it is
* intended to be used only once by normal backends. Hence, a quick
* hack: set DatabasePath directly then unset after use.
*/
DatabasePath = GetDatabasePath(dbid, tsid);
elog(trace_recovery(DEBUG4), "removing relcache init file in \"%s\"",
DatabasePath);
RelationCacheInitFilePreInvalidate();
pfree(DatabasePath);
DatabasePath = NULL;
}
SendSharedInvalidMessages(msgs, nmsgs); SendSharedInvalidMessages(msgs, nmsgs);
if (RelcacheInitFileInval) if (RelcacheInitFileInval)
RecoveryRelationCacheInitFileInvalidate(dbid, tsid, false); RelationCacheInitFilePostInvalidate();
} }
/* /*
@ -931,7 +932,7 @@ AtEOXact_Inval(bool isCommit)
* unless we committed. * unless we committed.
*/ */
if (transInvalInfo->RelcacheInitFileInval) if (transInvalInfo->RelcacheInitFileInval)
RelationCacheInitFileInvalidate(true); RelationCacheInitFilePreInvalidate();
AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
&transInvalInfo->CurrentCmdInvalidMsgs); &transInvalInfo->CurrentCmdInvalidMsgs);
@ -940,7 +941,7 @@ AtEOXact_Inval(bool isCommit)
SendSharedInvalidMessages); SendSharedInvalidMessages);
if (transInvalInfo->RelcacheInitFileInval) if (transInvalInfo->RelcacheInitFileInval)
RelationCacheInitFileInvalidate(false); RelationCacheInitFilePostInvalidate();
} }
else if (transInvalInfo != NULL) else if (transInvalInfo != NULL)
{ {

View File

@ -4377,8 +4377,8 @@ write_relcache_init_file(bool shared)
* updated by SI message processing, but we can't be sure whether what we * updated by SI message processing, but we can't be sure whether what we
* wrote out was up-to-date.) * wrote out was up-to-date.)
* *
* This mustn't run concurrently with RelationCacheInitFileInvalidate, so * This mustn't run concurrently with the code that unlinks an init file
* grab a serialization lock for the duration. * and sends SI messages, so grab a serialization lock for the duration.
*/ */
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
@ -4442,19 +4442,22 @@ RelationIdIsInInitFile(Oid relationId)
* changed one or more of the relation cache entries that are kept in the * changed one or more of the relation cache entries that are kept in the
* local init file. * local init file.
* *
* We actually need to remove the init file twice: once just before sending * To be safe against concurrent inspection or rewriting of the init file,
* the SI messages that include relcache inval for such relations, and once * we must take RelCacheInitLock, then remove the old init file, then send
* just after sending them. The unlink before ensures that a backend that's * the SI messages that include relcache inval for such relations, and then
* currently starting cannot read the now-obsolete init file and then miss * release RelCacheInitLock. This serializes the whole affair against
* the SI messages that will force it to update its relcache entries. (This * write_relcache_init_file, so that we can be sure that any other process
* works because the backend startup sequence gets into the PGPROC array before * that's concurrently trying to create a new init file won't move an
* trying to load the init file.) The unlink after is to synchronize with a * already-stale version into place after we unlink. Also, because we unlink
* backend that may currently be trying to write an init file based on data * before sending the SI messages, a backend that's currently starting cannot
* that we've just rendered invalid. Such a backend will see the SI messages, * read the now-obsolete init file and then miss the SI messages that will
* but we can't leave the init file sitting around to fool later backends. * force it to update its relcache entries. (This works because the backend
* startup sequence gets into the sinval array before trying to load the init
* file.)
* *
* Ignore any failure to unlink the file, since it might not be there if * We take the lock and do the unlink in RelationCacheInitFilePreInvalidate,
* no backend has been started since the last removal. * then release the lock in RelationCacheInitFilePostInvalidate. Caller must
* send any pending SI messages between those calls.
* *
* Notice this deals only with the local init file, not the shared init file. * Notice this deals only with the local init file, not the shared init file.
* The reason is that there can never be a "significant" change to the * The reason is that there can never be a "significant" change to the
@ -4464,34 +4467,37 @@ RelationIdIsInInitFile(Oid relationId)
* be invalid enough to make it necessary to remove it. * be invalid enough to make it necessary to remove it.
*/ */
void void
RelationCacheInitFileInvalidate(bool beforeSend) RelationCacheInitFilePreInvalidate(void)
{ {
char initfilename[MAXPGPATH]; char initfilename[MAXPGPATH];
snprintf(initfilename, sizeof(initfilename), "%s/%s", snprintf(initfilename, sizeof(initfilename), "%s/%s",
DatabasePath, RELCACHE_INIT_FILENAME); DatabasePath, RELCACHE_INIT_FILENAME);
if (beforeSend) LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE);
{
/* no interlock needed here */ if (unlink(initfilename) < 0)
unlink(initfilename);
}
else
{ {
/* /*
* We need to interlock this against write_relcache_init_file, to * The file might not be there if no backend has been started since
* guard against possibility that someone renames a new-but- * the last removal. But complain about failures other than ENOENT.
* already-obsolete init file into place just after we unlink. With * Fortunately, it's not too late to abort the transaction if we
* the interlock, it's certain that write_relcache_init_file will * can't get rid of the would-be-obsolete init file.
* notice our SI inval message before renaming into place, or else
* that we will execute second and successfully unlink the file.
*/ */
LWLockAcquire(RelCacheInitLock, LW_EXCLUSIVE); if (errno != ENOENT)
unlink(initfilename); ereport(ERROR,
LWLockRelease(RelCacheInitLock); (errcode_for_file_access(),
errmsg("could not remove cache file \"%s\": %m",
initfilename)));
} }
} }
void
RelationCacheInitFilePostInvalidate(void)
{
LWLockRelease(RelCacheInitLock);
}
/* /*
* Remove the init files during postmaster startup. * Remove the init files during postmaster startup.
* *

View File

@ -98,7 +98,8 @@ extern void AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
* Routines to help manage rebuilding of relcache init files * Routines to help manage rebuilding of relcache init files
*/ */
extern bool RelationIdIsInInitFile(Oid relationId); extern bool RelationIdIsInInitFile(Oid relationId);
extern void RelationCacheInitFileInvalidate(bool beforeSend); extern void RelationCacheInitFilePreInvalidate(void);
extern void RelationCacheInitFilePostInvalidate(void);
extern void RelationCacheInitFileRemove(void); extern void RelationCacheInitFileRemove(void);
/* should be used only by relcache.c and catcache.c */ /* should be used only by relcache.c and catcache.c */