diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c index ed33854c578..e7efbfb9c34 100644 --- a/contrib/basic_archive/basic_archive.c +++ b/contrib/basic_archive/basic_archive.c @@ -281,10 +281,9 @@ basic_archive_file_internal(const char *file, const char *path) /* * Sync the temporary file to disk and move it to its final destination. - * Note that this will overwrite any existing file, but this is only - * possible if someone else created the file since the stat() above. + * This will fail if destination already exists. */ - (void) durable_rename(temp, destination, ERROR); + (void) durable_rename_excl(temp, destination, ERROR); ereport(DEBUG1, (errmsg("archived \"%s\" via basic_archive", file))); diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c index c9344e5e8b4..be21968293c 100644 --- a/src/backend/access/transam/timeline.c +++ b/src/backend/access/transam/timeline.c @@ -439,11 +439,14 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, /* * Now move the completed history file into place with its final name. - * The target file should not exist. */ TLHistoryFilePath(path, newTLI); - Assert(access(path, F_OK) != 0 && errno == ENOENT); - durable_rename(tmppath, path, ERROR); + + /* + * Perform the rename using link if available, paranoidly trying to avoid + * overwriting an existing file (there shouldn't be one). + */ + durable_rename_excl(tmppath, path, ERROR); /* The history file can be archived immediately. */ if (XLogArchivingActive()) @@ -514,11 +517,14 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size) /* * Now move the completed history file into place with its final name. - * The target file should not exist. */ TLHistoryFilePath(path, tli); - Assert(access(path, F_OK) != 0 && errno == ENOENT); - durable_rename(tmppath, path, ERROR); + + /* + * Perform the rename using link if available, paranoidly trying to avoid + * overwriting an existing file (there shouldn't be one). + */ + durable_rename_excl(tmppath, path, ERROR); } /* diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 45c84e3959c..61cda56c6ff 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -3323,12 +3323,14 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, } } - /* The target file should not exist */ - Assert(access(path, F_OK) != 0 && errno == ENOENT); - if (durable_rename(tmppath, path, LOG) != 0) + /* + * Perform the rename using link if available, paranoidly trying to avoid + * overwriting an existing file (there shouldn't be one). + */ + if (durable_rename_excl(tmppath, path, LOG) != 0) { LWLockRelease(ControlFileLock); - /* durable_rename already emitted log message */ + /* durable_rename_excl already emitted log message */ return false; } diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index f904f60c086..24704b6a023 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -807,6 +807,69 @@ durable_unlink(const char *fname, int elevel) return 0; } +/* + * durable_rename_excl -- rename a file in a durable manner. + * + * Similar to durable_rename(), except that this routine tries (but does not + * guarantee) not to overwrite the target file. + * + * Note that a crash in an unfortunate moment can leave you with two links to + * the target file. + * + * Log errors with the caller specified severity. + * + * On Windows, using a hard link followed by unlink() causes concurrency + * issues, while a simple rename() does not cause that, so be careful when + * changing the logic of this routine. + * + * Returns 0 if the operation succeeded, -1 otherwise. Note that errno is not + * valid upon return. + */ +int +durable_rename_excl(const char *oldfile, const char *newfile, int elevel) +{ + /* + * Ensure that, if we crash directly after the rename/link, a file with + * valid contents is moved into place. + */ + if (fsync_fname_ext(oldfile, false, false, elevel) != 0) + return -1; + +#ifdef HAVE_WORKING_LINK + if (link(oldfile, newfile) < 0) + { + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not link file \"%s\" to \"%s\": %m", + oldfile, newfile))); + return -1; + } + unlink(oldfile); +#else + if (rename(oldfile, newfile) < 0) + { + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not rename file \"%s\" to \"%s\": %m", + oldfile, newfile))); + return -1; + } +#endif + + /* + * Make change persistent in case of an OS crash, both the new entry and + * its parent directory need to be flushed. + */ + if (fsync_fname_ext(newfile, false, false, elevel) != 0) + return -1; + + /* Same for parent directory */ + if (fsync_parent_path(newfile, elevel) != 0) + return -1; + + return 0; +} + /* * InitFileAccess --- initialize this module during backend startup * diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h index 830804fdfbf..84ce5a4a5d7 100644 --- a/src/include/pg_config_manual.h +++ b/src/include/pg_config_manual.h @@ -163,6 +163,13 @@ #define USE_BARRIER_SMGRRELEASE #endif +/* + * Define this if your operating system supports link() + */ +#if !defined(WIN32) && !defined(__CYGWIN__) +#define HAVE_WORKING_LINK 1 +#endif + /* * USE_POSIX_FADVISE controls whether Postgres will attempt to use the * posix_fadvise() kernel call. Usually the automatic configure tests are diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 2b4a8e0ffe8..69549b000fa 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -187,6 +187,7 @@ extern void fsync_fname(const char *fname, bool isdir); extern int fsync_fname_ext(const char *fname, bool isdir, bool ignore_perm, int elevel); extern int durable_rename(const char *oldfile, const char *newfile, int loglevel); extern int durable_unlink(const char *fname, int loglevel); +extern int durable_rename_excl(const char *oldfile, const char *newfile, int loglevel); extern void SyncDataDirectory(void); extern int data_sync_elevel(int elevel);