mirror of
https://github.com/postgres/postgres.git
synced 2025-05-12 00:01:58 -04:00
Don't ERROR on PreallocXlogFiles() race condition.
Before a restartpoint finishes PreallocXlogFiles(), a startup process KeepFileRestoredFromArchive() call can unlink the preallocated segment. If a CHECKPOINT sql command had elicited the restartpoint experiencing the race condition, that sql command failed. Moreover, the restartpoint omitted its log_checkpoints message and some inessential resource reclamation. Prevent the ERROR by skipping open() of the segment. Since these consequences are so minor, no back-patch. This commit has been applied as of 2b3e4672f760 in v15 and newer versions. This is required on stable branches of v13 and v14 to fix a regression reported by Noah Misch, introduced by 1f95181b44c8, causing spurious failures in archive recovery (neither streaming nor archive recovery) with concurrent restartpoints. The backpatched versions of the patches have been aligned on these branches by me, Noah Misch is the author. Tests have been conducted by the both of us. Reported-by: Arun Thirupathi Author: Noah Misch <noah@leadboat.com> Discussion: https://postgr.es/m/20210202151416.GB3304930@rfd.leadboat.com Discussion: https://postgr.es/m/20250306193013.36.nmisch@google.com Backpatch-through: 13
This commit is contained in:
parent
e77d9cd4f5
commit
cbed472a93
@ -2440,7 +2440,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
|
||||
bool ispartialpage;
|
||||
bool last_iteration;
|
||||
bool finishing_seg;
|
||||
bool added;
|
||||
int curridx;
|
||||
int npages;
|
||||
int startidx;
|
||||
@ -2507,7 +2506,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
|
||||
wal_segment_size);
|
||||
|
||||
/* create/use new log file */
|
||||
openLogFile = XLogFileInit(openLogSegNo, &added);
|
||||
openLogFile = XLogFileInit(openLogSegNo);
|
||||
ReserveExternalFD();
|
||||
}
|
||||
|
||||
@ -3253,23 +3252,21 @@ XLogNeedsFlush(XLogRecPtr record)
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a new XLOG file segment, or open a pre-existing one.
|
||||
* Try to make a given XLOG file segment exist.
|
||||
*
|
||||
* logsegno: identify segment to be created/opened.
|
||||
* logsegno: identify segment.
|
||||
*
|
||||
* *added: on return, true if this call raised the number of extant segments.
|
||||
*
|
||||
* Returns FD of opened file.
|
||||
* path: on return, this char[MAXPGPATH] has the path to the logsegno file.
|
||||
*
|
||||
* Note: errors here are ERROR not PANIC because we might or might not be
|
||||
* inside a critical section (eg, during checkpoint there is no reason to
|
||||
* take down the system on failure). They will promote to PANIC if we are
|
||||
* in a critical section.
|
||||
* Returns -1 or FD of opened file. A -1 here is not an error; a caller
|
||||
* wanting an open segment should attempt to open "path", which usually will
|
||||
* succeed. (This is weird, but it's efficient for the callers.)
|
||||
*/
|
||||
int
|
||||
XLogFileInit(XLogSegNo logsegno, bool *added)
|
||||
static int
|
||||
XLogFileInitInternal(XLogSegNo logsegno, bool *added, char *path)
|
||||
{
|
||||
char path[MAXPGPATH];
|
||||
char tmppath[MAXPGPATH];
|
||||
PGAlignedXLogBlock zbuffer;
|
||||
XLogSegNo installed_segno;
|
||||
@ -3407,26 +3404,53 @@ XLogFileInit(XLogSegNo logsegno, bool *added)
|
||||
*/
|
||||
max_segno = logsegno + CheckPointSegments;
|
||||
if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno))
|
||||
{
|
||||
*added = true;
|
||||
elog(DEBUG2, "done creating and filling new WAL file");
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* No need for any more future segments, or InstallXLogFileSegment()
|
||||
* failed to rename the file into place. If the rename failed, opening
|
||||
* the file below will fail.
|
||||
* failed to rename the file into place. If the rename failed, a
|
||||
* caller opening the file may fail.
|
||||
*/
|
||||
unlink(tmppath);
|
||||
elog(DEBUG2, "abandoned new WAL file");
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a new XLOG file segment, or open a pre-existing one.
|
||||
*
|
||||
* logsegno: identify segment to be created/opened.
|
||||
*
|
||||
* Returns FD of opened file.
|
||||
*
|
||||
* Note: errors here are ERROR not PANIC because we might or might not be
|
||||
* inside a critical section (eg, during checkpoint there is no reason to
|
||||
* take down the system on failure). They will promote to PANIC if we are
|
||||
* in a critical section.
|
||||
*/
|
||||
int
|
||||
XLogFileInit(XLogSegNo logsegno)
|
||||
{
|
||||
bool ignore_added;
|
||||
char path[MAXPGPATH];
|
||||
int fd;
|
||||
|
||||
fd = XLogFileInitInternal(logsegno, &ignore_added, path);
|
||||
if (fd >= 0)
|
||||
return fd;
|
||||
|
||||
/* Now open original target segment (might not be file I just made) */
|
||||
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | get_sync_bit(sync_method));
|
||||
if (fd < 0)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("could not open file \"%s\": %m", path)));
|
||||
|
||||
elog(DEBUG2, "done creating and filling new WAL file");
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
@ -3883,6 +3907,15 @@ XLogFileClose(void)
|
||||
* High-volume systems will be OK once they've built up a sufficient set of
|
||||
* recycled log segments, but the startup transient is likely to include
|
||||
* a lot of segment creations by foreground processes, which is not so good.
|
||||
*
|
||||
* XLogFileInitInternal() can ereport(ERROR). All known causes indicate big
|
||||
* trouble; for example, a full filesystem is one cause. The checkpoint WAL
|
||||
* and/or ControlFile updates already completed. If a RequestCheckpoint()
|
||||
* initiated the present checkpoint and an ERROR ends this function, the
|
||||
* command that called RequestCheckpoint() fails. That's not ideal, but it's
|
||||
* not worth contorting more functions to use caller-specified elevel values.
|
||||
* (With or without RequestCheckpoint(), an ERROR forestalls some inessential
|
||||
* reporting and resource reclamation.)
|
||||
*/
|
||||
static void
|
||||
PreallocXlogFiles(XLogRecPtr endptr)
|
||||
@ -3890,6 +3923,7 @@ PreallocXlogFiles(XLogRecPtr endptr)
|
||||
XLogSegNo _logSegNo;
|
||||
int lf;
|
||||
bool added;
|
||||
char path[MAXPGPATH];
|
||||
uint64 offset;
|
||||
|
||||
XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
|
||||
@ -3897,8 +3931,9 @@ PreallocXlogFiles(XLogRecPtr endptr)
|
||||
if (offset >= (uint32) (0.75 * wal_segment_size))
|
||||
{
|
||||
_logSegNo++;
|
||||
lf = XLogFileInit(_logSegNo, &added);
|
||||
close(lf);
|
||||
lf = XLogFileInitInternal(_logSegNo, &added, path);
|
||||
if (lf >= 0)
|
||||
close(lf);
|
||||
if (added)
|
||||
CheckpointStats.ckpt_segs_added++;
|
||||
}
|
||||
@ -5212,7 +5247,6 @@ BootStrapXLOG(void)
|
||||
XLogLongPageHeader longpage;
|
||||
XLogRecord *record;
|
||||
char *recptr;
|
||||
bool added;
|
||||
uint64 sysidentifier;
|
||||
struct timeval tv;
|
||||
pg_crc32c crc;
|
||||
@ -5309,7 +5343,7 @@ BootStrapXLOG(void)
|
||||
record->xl_crc = crc;
|
||||
|
||||
/* Create first XLOG segment file */
|
||||
openLogFile = XLogFileInit(1, &added);
|
||||
openLogFile = XLogFileInit(1);
|
||||
|
||||
/*
|
||||
* We needn't bother with Reserve/ReleaseExternalFD here, since we'll
|
||||
@ -5615,10 +5649,9 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
|
||||
* The switch happened at a segment boundary, so just create the next
|
||||
* segment on the new timeline.
|
||||
*/
|
||||
bool added;
|
||||
int fd;
|
||||
|
||||
fd = XLogFileInit(startLogSegNo, &added);
|
||||
fd = XLogFileInit(startLogSegNo);
|
||||
|
||||
if (close(fd) != 0)
|
||||
{
|
||||
|
@ -918,11 +918,9 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
|
||||
|
||||
if (recvFile < 0)
|
||||
{
|
||||
bool added = true;
|
||||
|
||||
/* Create/use new log file */
|
||||
XLByteToSeg(recptr, recvSegNo, wal_segment_size);
|
||||
recvFile = XLogFileInit(recvSegNo, &added);
|
||||
recvFile = XLogFileInit(recvSegNo);
|
||||
recvFileTLI = ThisTimeLineID;
|
||||
}
|
||||
|
||||
|
@ -286,7 +286,7 @@ extern XLogRecPtr XLogInsertRecord(struct XLogRecData *rdata,
|
||||
extern void XLogFlush(XLogRecPtr RecPtr);
|
||||
extern bool XLogBackgroundFlush(void);
|
||||
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
|
||||
extern int XLogFileInit(XLogSegNo segno, bool *added);
|
||||
extern int XLogFileInit(XLogSegNo segno);
|
||||
extern int XLogFileOpen(XLogSegNo segno);
|
||||
|
||||
extern void CheckXLogRemoved(XLogSegNo segno, TimeLineID tli);
|
||||
|
Loading…
x
Reference in New Issue
Block a user