Compare commits

...

5 Commits

Author SHA1 Message Date
Jeff Davis
00d7fb5e2e Assert that buffers are marked dirty before XLogRegisterBuffer().
Enforce the rule from transam/README in XLogRegisterBuffer(), and
update callers to follow the rule.

Hash indexes sometimes register clean pages as a part of the locking
protocol, so provide a REGBUF_NO_CHANGE flag to support that use.

Discussion: https://postgr.es/m/c84114f8-c7f1-5b57-f85a-3adc31e1a904@iki.fi
Reviewed-by: Heikki Linnakangas
2023-10-23 17:17:46 -07:00
Michael Paquier
befe9451fb doc: Improve example query related to pg_wait_events
Author: Pavel Luzanov
Discussion: https://postgr.es/m/4f79be75-7e30-4817-b0da-4a691ea5427f@postgrespro.ru
2023-10-24 08:40:43 +09:00
Michael Paquier
9972c7de1d Fix typos in wait_event.c
Noticed while working on a different patch.  Introduced in af720b4c50a1.
2023-10-24 08:05:29 +09:00
Robert Haas
5b36e8f078 Change struct tablespaceinfo's oid member from 'char *' to 'Oid'
This shouldn't change behavior except in the unusual case where
there are file in the tablespace directory that have entirely
numeric names but are nevertheless not possible names for a
tablespace directory, either because their names have leading zeroes
that shouldn't be there, or the value is actually zero, or because
the value is too large to represent as an OID.

In those cases, the directory would previously have made it into
the list of tablespaceinfo objects and no longer will. Thus, base
backups will now ignore such directories, instead of treating them
as legitimate tablespace directories. Similarly, if entries for
such tablespaces occur in a tablespace_map file, they will now
be rejected as erroneous, instead of being honored.

This is infrastructure for future work that wants to be able to
know the tablespace of each relation that is part of a backup
*as an OID*. By strengthening the up-front validation, we don't
have to worry about weird cases later, and can more easily avoid
repeated string->integer conversions.

Patch by me, reviewed by David Steele.

Discussion: http://postgr.es/m/CA+TgmoZNVeBzoqDL8xvr-nkaepq815jtDR4nJzPew7=3iEuM1g@mail.gmail.com
2023-10-23 15:17:26 -04:00
Robert Haas
5c47c6546c Refactor parse_filename_for_nontemp_relation to parse more.
Instead of returning the number of characters in the RelFileNumber,
return the RelFileNumber itself. Continue to return the fork number,
as before, and additionally return the segment number.

parse_filename_for_nontemp_relation now rejects a RelFileNumber or
segment number that begins with a leading zero. Before, we accepted
such cases as relation filenames, but if we continued to do so after
this change, the function might return the same values for two
different files (e.g. 1234.5 and 001234.5 or 1234.005) which could be
annoying for callers. Since we don't actually ever generate filenames
with leading zeroes in the names, any such files that we find must
have been created by something other than PostgreSQL, and it is
therefore reasonable to treat them as non-relation files.

Along the way, change unlogged_relation_entry to store a RelFileNumber
rather than an OID. This update should have been made in
851f4cc75cdd8c831f1baa9a7abf8c8248b65890, but it was overlooked.
It's trivial to make the update as part of this commit, perhaps more
trivial than it would have been without it, so do that.

Patch by me, reviewed by David Steele.

Discussion: http://postgr.es/m/CA+TgmoZNVeBzoqDL8xvr-nkaepq815jtDR4nJzPew7=3iEuM1g@mail.gmail.com
2023-10-23 15:08:53 -04:00
21 changed files with 264 additions and 117 deletions

View File

@ -1119,7 +1119,7 @@ SELECT a.pid, a.wait_event, w.description
FROM pg_stat_activity a JOIN FROM pg_stat_activity a JOIN
pg_wait_events w ON (a.wait_event_type = w.type AND pg_wait_events w ON (a.wait_event_type = w.type AND
a.wait_event = w.name) a.wait_event = w.name)
WHERE wait_event is NOT NULL and a.state = 'active'; WHERE a.wait_event is NOT NULL and a.state = 'active';
-[ RECORD 1 ]------------------------------------------------------&zwsp;------------ -[ RECORD 1 ]------------------------------------------------------&zwsp;------------
pid | 686674 pid | 686674
wait_event | WALInitSync wait_event | WALInitSync

View File

@ -387,24 +387,22 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
START_CRIT_SECTION(); START_CRIT_SECTION();
if (RelationNeedsWAL(btree->index) && !btree->isBuild) if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogBeginInsert(); XLogBeginInsert();
XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
if (BufferIsValid(childbuf))
XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
}
/* Perform the page update, and register any extra WAL data */ /*
* Perform the page update, dirty and register stack->buffer, and
* register any extra WAL data.
*/
btree->execPlaceToPage(btree, stack->buffer, stack, btree->execPlaceToPage(btree, stack->buffer, stack,
insertdata, updateblkno, ptp_workspace); insertdata, updateblkno, ptp_workspace);
MarkBufferDirty(stack->buffer);
/* An insert to an internal page finishes the split of the child. */ /* An insert to an internal page finishes the split of the child. */
if (BufferIsValid(childbuf)) if (BufferIsValid(childbuf))
{ {
GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
MarkBufferDirty(childbuf); MarkBufferDirty(childbuf);
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
} }
if (RelationNeedsWAL(btree->index) && !btree->isBuild) if (RelationNeedsWAL(btree->index) && !btree->isBuild)

View File

@ -721,9 +721,12 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
/* Apply changes to page */ /* Apply changes to page */
dataPlaceToPageLeafRecompress(buf, leaf); dataPlaceToPageLeafRecompress(buf, leaf);
MarkBufferDirty(buf);
/* If needed, register WAL data built by computeLeafRecompressWALData */ /* If needed, register WAL data built by computeLeafRecompressWALData */
if (RelationNeedsWAL(btree->index) && !btree->isBuild) if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{ {
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen); XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
} }
} }
@ -1155,6 +1158,8 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata; pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off); GinDataPageAddPostingItem(page, pitem, off);
MarkBufferDirty(buf);
if (RelationNeedsWAL(btree->index) && !btree->isBuild) if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{ {
/* /*
@ -1167,6 +1172,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
data.offset = off; data.offset = off;
data.newitem = *pitem; data.newitem = *pitem;
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, (char *) &data, XLogRegisterBufData(0, (char *) &data,
sizeof(ginxlogInsertDataInternal)); sizeof(ginxlogInsertDataInternal));
} }

View File

@ -571,6 +571,8 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"", elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index)); RelationGetRelationName(btree->index));
MarkBufferDirty(buf);
if (RelationNeedsWAL(btree->index) && !btree->isBuild) if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{ {
/* /*
@ -583,6 +585,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
data.isDelete = insertData->isDelete; data.isDelete = insertData->isDelete;
data.offset = off; data.offset = off;
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, (char *) &data, XLogRegisterBufData(0, (char *) &data,
offsetof(ginxlogInsertEntry, tuple)); offsetof(ginxlogInsertEntry, tuple));
XLogRegisterBufData(0, (char *) insertData->entry, XLogRegisterBufData(0, (char *) insertData->entry,

View File

@ -397,6 +397,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
} }
Assert((ptr - collectordata) <= collector->sumsize); Assert((ptr - collectordata) <= collector->sumsize);
MarkBufferDirty(buffer);
if (needWal) if (needWal)
{ {
XLogRegisterBuffer(1, buffer, REGBUF_STANDARD); XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
@ -404,8 +407,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
} }
metadata->tailFreeSize = PageGetExactFreeSpace(page); metadata->tailFreeSize = PageGetExactFreeSpace(page);
MarkBufferDirty(buffer);
} }
/* /*

View File

@ -824,11 +824,16 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
XLogRegisterData((char *) &xlrec, SizeOfHashDelete); XLogRegisterData((char *) &xlrec, SizeOfHashDelete);
/* /*
* bucket buffer needs to be registered to ensure that we can * bucket buffer was not changed, but still needs to be
* acquire a cleanup lock on it during replay. * registered to ensure that we can acquire a cleanup lock on
* it during replay.
*/ */
if (!xlrec.is_primary_bucket_page) if (!xlrec.is_primary_bucket_page)
XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE); {
uint8 flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
XLogRegisterBuffer(0, bucket_buf, flags);
}
XLogRegisterBuffer(1, buf, REGBUF_STANDARD); XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
XLogRegisterBufData(1, (char *) deletable, XLogRegisterBufData(1, (char *) deletable,

View File

@ -658,11 +658,15 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
XLogRegisterData((char *) &xlrec, SizeOfHashSqueezePage); XLogRegisterData((char *) &xlrec, SizeOfHashSqueezePage);
/* /*
* bucket buffer needs to be registered to ensure that we can acquire * bucket buffer was not changed, but still needs to be registered to
* a cleanup lock on it during replay. * ensure that we can acquire a cleanup lock on it during replay.
*/ */
if (!xlrec.is_prim_bucket_same_wrt) if (!xlrec.is_prim_bucket_same_wrt)
XLogRegisterBuffer(0, bucketbuf, REGBUF_STANDARD | REGBUF_NO_IMAGE); {
uint8 flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
XLogRegisterBuffer(0, bucketbuf, flags);
}
XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD); XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
if (xlrec.ntups > 0) if (xlrec.ntups > 0)
@ -960,11 +964,16 @@ readpage:
XLogRegisterData((char *) &xlrec, SizeOfHashMovePageContents); XLogRegisterData((char *) &xlrec, SizeOfHashMovePageContents);
/* /*
* bucket buffer needs to be registered to ensure that * bucket buffer was not changed, but still needs to
* we can acquire a cleanup lock on it during replay. * be registered to ensure that we can acquire a
* cleanup lock on it during replay.
*/ */
if (!xlrec.is_prim_bucket_same_wrt) if (!xlrec.is_prim_bucket_same_wrt)
XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE); {
int flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
XLogRegisterBuffer(0, bucket_buf, flags);
}
XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD); XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
XLogRegisterBufData(1, (char *) itup_offsets, XLogRegisterBufData(1, (char *) itup_offsets,

View File

@ -8579,9 +8579,22 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
char *relpath = NULL; char *relpath = NULL;
char *s; char *s;
PGFileType de_type; PGFileType de_type;
char *badp;
Oid tsoid;
/* Skip anything that doesn't look like a tablespace */ /*
if (strspn(de->d_name, "0123456789") != strlen(de->d_name)) * Try to parse the directory name as an unsigned integer.
*
* Tablespace directories should be positive integers that can be
* represented in 32 bits, with no leading zeroes or trailing
* garbage. If we come across a name that doesn't meet those
* criteria, skip it.
*/
if (de->d_name[0] < '1' || de->d_name[1] > '9')
continue;
errno = 0;
tsoid = strtoul(de->d_name, &badp, 10);
if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
continue; continue;
snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name); snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
@ -8656,7 +8669,7 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
} }
ti = palloc(sizeof(tablespaceinfo)); ti = palloc(sizeof(tablespaceinfo));
ti->oid = pstrdup(de->d_name); ti->oid = tsoid;
ti->path = pstrdup(linkpath); ti->path = pstrdup(linkpath);
ti->rpath = relpath; ti->rpath = relpath;
ti->size = -1; ti->size = -1;

View File

@ -248,6 +248,20 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE)))); Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE))));
Assert(begininsert_called); Assert(begininsert_called);
/*
* Ordinarily, buffer should be exclusive-locked and marked dirty before
* we get here, otherwise we could end up violating one of the rules in
* access/transam/README.
*
* Some callers intentionally register a clean page and never update that
* page's LSN; in that case they can pass the flag REGBUF_NO_CHANGE to
* bypass these checks.
*/
#ifdef USE_ASSERT_CHECKING
if (!(flags & REGBUF_NO_CHANGE))
Assert(BufferIsExclusiveLocked(buffer) && BufferIsDirty(buffer));
#endif
if (block_id >= max_registered_block_id) if (block_id >= max_registered_block_id)
{ {
if (block_id >= max_registered_buffers) if (block_id >= max_registered_buffers)
@ -1313,8 +1327,8 @@ log_newpage_range(Relation rel, ForkNumber forknum,
START_CRIT_SECTION(); START_CRIT_SECTION();
for (i = 0; i < nbufs; i++) for (i = 0; i < nbufs; i++)
{ {
XLogRegisterBuffer(i, bufpack[i], flags);
MarkBufferDirty(bufpack[i]); MarkBufferDirty(bufpack[i]);
XLogRegisterBuffer(i, bufpack[i], flags);
} }
recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI); recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);

View File

@ -678,7 +678,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
tablespaceinfo *ti = lfirst(lc); tablespaceinfo *ti = lfirst(lc);
char *linkloc; char *linkloc;
linkloc = psprintf("pg_tblspc/%s", ti->oid); linkloc = psprintf("pg_tblspc/%u", ti->oid);
/* /*
* Remove the existing symlink if any and Create the symlink * Remove the existing symlink if any and Create the symlink
@ -692,7 +692,6 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
errmsg("could not create symbolic link \"%s\": %m", errmsg("could not create symbolic link \"%s\": %m",
linkloc))); linkloc)));
pfree(ti->oid);
pfree(ti->path); pfree(ti->path);
pfree(ti); pfree(ti);
} }
@ -1341,6 +1340,8 @@ read_tablespace_map(List **tablespaces)
{ {
if (!was_backslash && (ch == '\n' || ch == '\r')) if (!was_backslash && (ch == '\n' || ch == '\r'))
{ {
char *endp;
if (i == 0) if (i == 0)
continue; /* \r immediately followed by \n */ continue; /* \r immediately followed by \n */
@ -1360,7 +1361,12 @@ read_tablespace_map(List **tablespaces)
str[n++] = '\0'; str[n++] = '\0';
ti = palloc0(sizeof(tablespaceinfo)); ti = palloc0(sizeof(tablespaceinfo));
ti->oid = pstrdup(str); errno = 0;
ti->oid = strtoul(str, &endp, 10);
if (*endp != '\0' || errno == EINVAL || errno == ERANGE)
ereport(FATAL,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
ti->path = pstrdup(str + n); ti->path = pstrdup(str + n);
*tablespaces = lappend(*tablespaces, ti); *tablespaces = lappend(*tablespaces, ti);

View File

@ -97,7 +97,7 @@ FreeBackupManifest(backup_manifest_info *manifest)
* Add an entry to the backup manifest for a file. * Add an entry to the backup manifest for a file.
*/ */
void void
AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid, AddFileToBackupManifest(backup_manifest_info *manifest, Oid spcoid,
const char *pathname, size_t size, pg_time_t mtime, const char *pathname, size_t size, pg_time_t mtime,
pg_checksum_context *checksum_ctx) pg_checksum_context *checksum_ctx)
{ {
@ -114,9 +114,9 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
* pathname relative to the data directory (ignoring the intermediate * pathname relative to the data directory (ignoring the intermediate
* symlink traversal). * symlink traversal).
*/ */
if (spcoid != NULL) if (OidIsValid(spcoid))
{ {
snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid, snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%u/%s", spcoid,
pathname); pathname);
pathname = pathbuf; pathname = pathbuf;
} }

View File

@ -75,14 +75,15 @@ typedef struct
pg_checksum_type manifest_checksum_type; pg_checksum_type manifest_checksum_type;
} basebackup_options; } basebackup_options;
static int64 sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly, static int64 sendTablespace(bbsink *sink, char *path, Oid spcoid, bool sizeonly,
struct backup_manifest_info *manifest); struct backup_manifest_info *manifest);
static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly, static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
List *tablespaces, bool sendtblspclinks, List *tablespaces, bool sendtblspclinks,
backup_manifest_info *manifest, const char *spcoid); backup_manifest_info *manifest, Oid spcoid);
static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
struct stat *statbuf, bool missing_ok, Oid dboid, struct stat *statbuf, bool missing_ok,
backup_manifest_info *manifest, const char *spcoid); Oid dboid, Oid spcoid,
backup_manifest_info *manifest);
static off_t read_file_data_into_buffer(bbsink *sink, static off_t read_file_data_into_buffer(bbsink *sink,
const char *readfilename, int fd, const char *readfilename, int fd,
off_t offset, size_t length, off_t offset, size_t length,
@ -305,7 +306,7 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
if (tmp->path == NULL) if (tmp->path == NULL)
tmp->size = sendDir(sink, ".", 1, true, state.tablespaces, tmp->size = sendDir(sink, ".", 1, true, state.tablespaces,
true, NULL, NULL); true, NULL, InvalidOid);
else else
tmp->size = sendTablespace(sink, tmp->path, tmp->oid, true, tmp->size = sendTablespace(sink, tmp->path, tmp->oid, true,
NULL); NULL);
@ -346,7 +347,7 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
/* Then the bulk of the files... */ /* Then the bulk of the files... */
sendDir(sink, ".", 1, false, state.tablespaces, sendDir(sink, ".", 1, false, state.tablespaces,
sendtblspclinks, &manifest, NULL); sendtblspclinks, &manifest, InvalidOid);
/* ... and pg_control after everything else. */ /* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0) if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@ -355,11 +356,11 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
errmsg("could not stat file \"%s\": %m", errmsg("could not stat file \"%s\": %m",
XLOG_CONTROL_FILE))); XLOG_CONTROL_FILE)));
sendFile(sink, XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, sendFile(sink, XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf,
false, InvalidOid, &manifest, NULL); false, InvalidOid, InvalidOid, &manifest);
} }
else else
{ {
char *archive_name = psprintf("%s.tar", ti->oid); char *archive_name = psprintf("%u.tar", ti->oid);
bbsink_begin_archive(sink, archive_name); bbsink_begin_archive(sink, archive_name);
@ -623,8 +624,8 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", pathbuf))); errmsg("could not stat file \"%s\": %m", pathbuf)));
sendFile(sink, pathbuf, pathbuf, &statbuf, false, InvalidOid, sendFile(sink, pathbuf, pathbuf, &statbuf, false,
&manifest, NULL); InvalidOid, InvalidOid, &manifest);
/* unconditionally mark file as archived */ /* unconditionally mark file as archived */
StatusFilePath(pathbuf, fname, ".done"); StatusFilePath(pathbuf, fname, ".done");
@ -1087,7 +1088,7 @@ sendFileWithContent(bbsink *sink, const char *filename, const char *content,
_tarWritePadding(sink, len); _tarWritePadding(sink, len);
AddFileToBackupManifest(manifest, NULL, filename, len, AddFileToBackupManifest(manifest, InvalidOid, filename, len,
(pg_time_t) statbuf.st_mtime, &checksum_ctx); (pg_time_t) statbuf.st_mtime, &checksum_ctx);
} }
@ -1099,7 +1100,7 @@ sendFileWithContent(bbsink *sink, const char *filename, const char *content,
* Only used to send auxiliary tablespaces, not PGDATA. * Only used to send auxiliary tablespaces, not PGDATA.
*/ */
static int64 static int64
sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly, sendTablespace(bbsink *sink, char *path, Oid spcoid, bool sizeonly,
backup_manifest_info *manifest) backup_manifest_info *manifest)
{ {
int64 size; int64 size;
@ -1154,7 +1155,7 @@ sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly,
static int64 static int64
sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly, sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
List *tablespaces, bool sendtblspclinks, backup_manifest_info *manifest, List *tablespaces, bool sendtblspclinks, backup_manifest_info *manifest,
const char *spcoid) Oid spcoid)
{ {
DIR *dir; DIR *dir;
struct dirent *de; struct dirent *de;
@ -1197,9 +1198,9 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
{ {
int excludeIdx; int excludeIdx;
bool excludeFound; bool excludeFound;
ForkNumber relForkNum; /* Type of fork if file is a relation */ RelFileNumber relNumber;
int relnumchars; /* Chars in filename that are the ForkNumber relForkNum;
* relnumber */ unsigned segno;
/* Skip special stuff */ /* Skip special stuff */
if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
@ -1249,23 +1250,20 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
/* Exclude all forks for unlogged tables except the init fork */ /* Exclude all forks for unlogged tables except the init fork */
if (isDbDir && if (isDbDir &&
parse_filename_for_nontemp_relation(de->d_name, &relnumchars, parse_filename_for_nontemp_relation(de->d_name, &relNumber,
&relForkNum)) &relForkNum, &segno))
{ {
/* Never exclude init forks */ /* Never exclude init forks */
if (relForkNum != INIT_FORKNUM) if (relForkNum != INIT_FORKNUM)
{ {
char initForkFile[MAXPGPATH]; char initForkFile[MAXPGPATH];
char relNumber[OIDCHARS + 1];
/* /*
* If any other type of fork, check if there is an init fork * If any other type of fork, check if there is an init fork
* with the same RelFileNumber. If so, the file can be * with the same RelFileNumber. If so, the file can be
* excluded. * excluded.
*/ */
memcpy(relNumber, de->d_name, relnumchars); snprintf(initForkFile, sizeof(initForkFile), "%s/%u_init",
relNumber[relnumchars] = '\0';
snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
path, relNumber); path, relNumber);
if (lstat(initForkFile, &statbuf) == 0) if (lstat(initForkFile, &statbuf) == 0)
@ -1419,8 +1417,8 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
if (!sizeonly) if (!sizeonly)
sent = sendFile(sink, pathbuf, pathbuf + basepathlen + 1, &statbuf, sent = sendFile(sink, pathbuf, pathbuf + basepathlen + 1, &statbuf,
true, isDbDir ? atooid(lastDir + 1) : InvalidOid, true, isDbDir ? atooid(lastDir + 1) : InvalidOid, spcoid,
manifest, spcoid); manifest);
if (sent || sizeonly) if (sent || sizeonly)
{ {
@ -1489,8 +1487,8 @@ is_checksummed_file(const char *fullpath, const char *filename)
*/ */
static bool static bool
sendFile(bbsink *sink, const char *readfilename, const char *tarfilename, sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
struct stat *statbuf, bool missing_ok, Oid dboid, struct stat *statbuf, bool missing_ok, Oid dboid, Oid spcoid,
backup_manifest_info *manifest, const char *spcoid) backup_manifest_info *manifest)
{ {
int fd; int fd;
BlockNumber blkno = 0; BlockNumber blkno = 0;

View File

@ -407,7 +407,7 @@ SendTablespaceList(List *tablespaces)
} }
else else
{ {
values[0] = ObjectIdGetDatum(strtoul(ti->oid, NULL, 10)); values[0] = ObjectIdGetDatum(ti->oid);
values[1] = CStringGetTextDatum(ti->path); values[1] = CStringGetTextDatum(ti->path);
} }
if (ti->size >= 0) if (ti->size >= 0)

View File

@ -2098,6 +2098,65 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
return first_block; return first_block;
} }
/*
* BufferIsExclusiveLocked
*
* Checks if buffer is exclusive-locked.
*
* Buffer must be pinned.
*/
bool
BufferIsExclusiveLocked(Buffer buffer)
{
BufferDesc *bufHdr;
if (BufferIsLocal(buffer))
{
int bufid = -buffer - 1;
bufHdr = GetLocalBufferDescriptor(bufid);
}
else
{
bufHdr = GetBufferDescriptor(buffer - 1);
}
Assert(BufferIsPinned(buffer));
return LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr),
LW_EXCLUSIVE);
}
/*
* BufferIsDirty
*
* Checks if buffer is already dirty.
*
* Buffer must be pinned and exclusive-locked. (Without an exclusive lock,
* the result may be stale before it's returned.)
*/
bool
BufferIsDirty(Buffer buffer)
{
BufferDesc *bufHdr;
if (BufferIsLocal(buffer))
{
int bufid = -buffer - 1;
bufHdr = GetLocalBufferDescriptor(bufid);
}
else
{
bufHdr = GetBufferDescriptor(buffer - 1);
}
Assert(BufferIsPinned(buffer));
Assert(LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr),
LW_EXCLUSIVE));
return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY;
}
/* /*
* MarkBufferDirty * MarkBufferDirty
* *

View File

@ -31,7 +31,7 @@ static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
typedef struct typedef struct
{ {
Oid reloid; /* hash key */ RelFileNumber relnumber; /* hash key */
} unlogged_relation_entry; } unlogged_relation_entry;
/* /*
@ -195,12 +195,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{ {
ForkNumber forkNum; ForkNumber forkNum;
int relnumchars; unsigned segno;
unlogged_relation_entry ent; unlogged_relation_entry ent;
/* Skip anything that doesn't look like a relation data file. */ /* Skip anything that doesn't look like a relation data file. */
if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars, if (!parse_filename_for_nontemp_relation(de->d_name,
&forkNum)) &ent.relnumber,
&forkNum, &segno))
continue; continue;
/* Also skip it unless this is the init fork. */ /* Also skip it unless this is the init fork. */
@ -208,10 +209,8 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
continue; continue;
/* /*
* Put the OID portion of the name into the hash table, if it * Put the RelFileNumber into the hash table, if it isn't already.
* isn't already.
*/ */
ent.reloid = atooid(de->d_name);
(void) hash_search(hash, &ent, HASH_ENTER, NULL); (void) hash_search(hash, &ent, HASH_ENTER, NULL);
} }
@ -235,12 +234,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{ {
ForkNumber forkNum; ForkNumber forkNum;
int relnumchars; unsigned segno;
unlogged_relation_entry ent; unlogged_relation_entry ent;
/* Skip anything that doesn't look like a relation data file. */ /* Skip anything that doesn't look like a relation data file. */
if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars, if (!parse_filename_for_nontemp_relation(de->d_name,
&forkNum)) &ent.relnumber,
&forkNum, &segno))
continue; continue;
/* We never remove the init fork. */ /* We never remove the init fork. */
@ -251,7 +251,6 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
* See whether the OID portion of the name shows up in the hash * See whether the OID portion of the name shows up in the hash
* table. If so, nuke it! * table. If so, nuke it!
*/ */
ent.reloid = atooid(de->d_name);
if (hash_search(hash, &ent, HASH_FIND, NULL)) if (hash_search(hash, &ent, HASH_FIND, NULL))
{ {
snprintf(rm_path, sizeof(rm_path), "%s/%s", snprintf(rm_path, sizeof(rm_path), "%s/%s",
@ -285,14 +284,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{ {
ForkNumber forkNum; ForkNumber forkNum;
int relnumchars; RelFileNumber relNumber;
char relnumbuf[OIDCHARS + 1]; unsigned segno;
char srcpath[MAXPGPATH * 2]; char srcpath[MAXPGPATH * 2];
char dstpath[MAXPGPATH]; char dstpath[MAXPGPATH];
/* Skip anything that doesn't look like a relation data file. */ /* Skip anything that doesn't look like a relation data file. */
if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars, if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber,
&forkNum)) &forkNum, &segno))
continue; continue;
/* Also skip it unless this is the init fork. */ /* Also skip it unless this is the init fork. */
@ -304,11 +303,12 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
dbspacedirname, de->d_name); dbspacedirname, de->d_name);
/* Construct destination pathname. */ /* Construct destination pathname. */
memcpy(relnumbuf, de->d_name, relnumchars); if (segno == 0)
relnumbuf[relnumchars] = '\0'; snprintf(dstpath, sizeof(dstpath), "%s/%u",
snprintf(dstpath, sizeof(dstpath), "%s/%s%s", dbspacedirname, relNumber);
dbspacedirname, relnumbuf, de->d_name + relnumchars + 1 + else
strlen(forkNames[INIT_FORKNUM])); snprintf(dstpath, sizeof(dstpath), "%s/%u.%u",
dbspacedirname, relNumber, segno);
/* OK, we're ready to perform the actual copy. */ /* OK, we're ready to perform the actual copy. */
elog(DEBUG2, "copying %s to %s", srcpath, dstpath); elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
@ -327,14 +327,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
dbspace_dir = AllocateDir(dbspacedirname); dbspace_dir = AllocateDir(dbspacedirname);
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL) while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{ {
RelFileNumber relNumber;
ForkNumber forkNum; ForkNumber forkNum;
int relnumchars; unsigned segno;
char relnumbuf[OIDCHARS + 1];
char mainpath[MAXPGPATH]; char mainpath[MAXPGPATH];
/* Skip anything that doesn't look like a relation data file. */ /* Skip anything that doesn't look like a relation data file. */
if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars, if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber,
&forkNum)) &forkNum, &segno))
continue; continue;
/* Also skip it unless this is the init fork. */ /* Also skip it unless this is the init fork. */
@ -342,11 +342,12 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
continue; continue;
/* Construct main fork pathname. */ /* Construct main fork pathname. */
memcpy(relnumbuf, de->d_name, relnumchars); if (segno == 0)
relnumbuf[relnumchars] = '\0'; snprintf(mainpath, sizeof(mainpath), "%s/%u",
snprintf(mainpath, sizeof(mainpath), "%s/%s%s", dbspacedirname, relNumber);
dbspacedirname, relnumbuf, de->d_name + relnumchars + 1 + else
strlen(forkNames[INIT_FORKNUM])); snprintf(mainpath, sizeof(mainpath), "%s/%u.%u",
dbspacedirname, relNumber, segno);
fsync_fname(mainpath, false); fsync_fname(mainpath, false);
} }
@ -371,52 +372,82 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
* This function returns true if the file appears to be in the correct format * This function returns true if the file appears to be in the correct format
* for a non-temporary relation and false otherwise. * for a non-temporary relation and false otherwise.
* *
* NB: If this function returns true, the caller is entitled to assume that * If it returns true, it sets *relnumber, *fork, and *segno to the values
* *relnumchars has been set to a value no more than OIDCHARS, and thus * extracted from the filename. If it returns false, these values are set to
* that a buffer of OIDCHARS+1 characters is sufficient to hold the * InvalidRelFileNumber, InvalidForkNumber, and 0, respectively.
* RelFileNumber portion of the filename. This is critical to protect against
* a possible buffer overrun.
*/ */
bool bool
parse_filename_for_nontemp_relation(const char *name, int *relnumchars, parse_filename_for_nontemp_relation(const char *name, RelFileNumber *relnumber,
ForkNumber *fork) ForkNumber *fork, unsigned *segno)
{ {
int pos; unsigned long n,
s;
ForkNumber f;
char *endp;
/* Look for a non-empty string of digits (that isn't too long). */ *relnumber = InvalidRelFileNumber;
for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) *fork = InvalidForkNumber;
; *segno = 0;
if (pos == 0 || pos > OIDCHARS)
/*
* Relation filenames should begin with a digit that is not a zero. By
* rejecting cases involving leading zeroes, the caller can assume that
* there's only one possible string of characters that could have produced
* any given value for *relnumber.
*
* (To be clear, we don't expect files with names like 0017.3 to exist at
* all -- but if 0017.3 does exist, it's a non-relation file, not part of
* the main fork for relfilenode 17.)
*/
if (name[0] < '1' || name[0] > '9')
return false; return false;
*relnumchars = pos;
/*
* Parse the leading digit string. If the value is out of range, we
* conclude that this isn't a relation file at all.
*/
errno = 0;
n = strtoul(name, &endp, 10);
if (errno || name == endp || n <= 0 || n > PG_UINT32_MAX)
return false;
name = endp;
/* Check for a fork name. */ /* Check for a fork name. */
if (name[pos] != '_') if (*name != '_')
*fork = MAIN_FORKNUM; f = MAIN_FORKNUM;
else else
{ {
int forkchar; int forkchar;
forkchar = forkname_chars(&name[pos + 1], fork); forkchar = forkname_chars(name + 1, &f);
if (forkchar <= 0) if (forkchar <= 0)
return false; return false;
pos += forkchar + 1; name += forkchar + 1;
} }
/* Check for a segment number. */ /* Check for a segment number. */
if (name[pos] == '.') if (*name != '.')
s = 0;
else
{ {
int segchar; /* Reject leading zeroes, just like we do for RelFileNumber. */
if (name[0] < '1' || name[0] > '9')
for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar)
;
if (segchar <= 1)
return false; return false;
pos += segchar;
errno = 0;
s = strtoul(name + 1, &endp, 10);
if (errno || name + 1 == endp || s <= 0 || s > PG_UINT32_MAX)
return false;
name = endp;
} }
/* Now we should be at the end. */ /* Now we should be at the end. */
if (name[pos] != '\0') if (*name != '\0')
return false; return false;
/* Set out parameters and return. */
*relnumber = (RelFileNumber) n;
*fork = f;
*segno = (unsigned) s;
return true; return true;
} }

View File

@ -56,9 +56,9 @@ uint32 *my_wait_event_info = &local_my_wait_event_info;
* It is used to ensure that no duplicated entries are registered. * It is used to ensure that no duplicated entries are registered.
* *
* The size of the hash table is based on the assumption that * The size of the hash table is based on the assumption that
* WAIT_EVENT_EXTENSION_BASH_INIT_SIZE is enough for most cases, and it seems * WAIT_EVENT_EXTENSION_HASH_INIT_SIZE is enough for most cases, and it seems
* unlikely that the number of entries will reach * unlikely that the number of entries will reach
* WAIT_EVENT_EXTENSION_BASH_MAX_SIZE. * WAIT_EVENT_EXTENSION_HASH_MAX_SIZE.
*/ */
static HTAB *WaitEventExtensionHashById; /* find names from IDs */ static HTAB *WaitEventExtensionHashById; /* find names from IDs */
static HTAB *WaitEventExtensionHashByName; /* find IDs from names */ static HTAB *WaitEventExtensionHashByName; /* find IDs from names */

View File

@ -37,6 +37,7 @@
* will be skipped) */ * will be skipped) */
#define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image #define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image
* is taken */ * is taken */
#define REGBUF_NO_CHANGE 0x20 /* intentionally register clean buffer */
/* prototypes for public functions in xloginsert.c: */ /* prototypes for public functions in xloginsert.c: */
extern void XLogBeginInsert(void); extern void XLogBeginInsert(void);

View File

@ -39,7 +39,7 @@ extern void InitializeBackupManifest(backup_manifest_info *manifest,
backup_manifest_option want_manifest, backup_manifest_option want_manifest,
pg_checksum_type manifest_checksum_type); pg_checksum_type manifest_checksum_type);
extern void AddFileToBackupManifest(backup_manifest_info *manifest, extern void AddFileToBackupManifest(backup_manifest_info *manifest,
const char *spcoid, Oid spcoid,
const char *pathname, size_t size, const char *pathname, size_t size,
pg_time_t mtime, pg_time_t mtime,
pg_checksum_context *checksum_ctx); pg_checksum_context *checksum_ctx);

View File

@ -27,7 +27,7 @@
*/ */
typedef struct typedef struct
{ {
char *oid; /* tablespace's OID, as a decimal string */ Oid oid; /* tablespace's OID */
char *path; /* full path to tablespace's directory */ char *path; /* full path to tablespace's directory */
char *rpath; /* relative path if it's within PGDATA, else char *rpath; /* relative path if it's within PGDATA, else
* NULL */ * NULL */

View File

@ -179,6 +179,8 @@ extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator,
bool permanent); bool permanent);
extern void ReleaseBuffer(Buffer buffer); extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer); extern void UnlockReleaseBuffer(Buffer buffer);
extern bool BufferIsExclusiveLocked(Buffer buffer);
extern bool BufferIsDirty(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer); extern void MarkBufferDirty(Buffer buffer);
extern void IncrBufferRefCount(Buffer buffer); extern void IncrBufferRefCount(Buffer buffer);
extern void CheckBufferIsPinnedOnce(Buffer buffer); extern void CheckBufferIsPinnedOnce(Buffer buffer);

View File

@ -20,8 +20,9 @@
extern void ResetUnloggedRelations(int op); extern void ResetUnloggedRelations(int op);
extern bool parse_filename_for_nontemp_relation(const char *name, extern bool parse_filename_for_nontemp_relation(const char *name,
int *relnumchars, RelFileNumber *relnumber,
ForkNumber *fork); ForkNumber *fork,
unsigned *segno);
#define UNLOGGED_RELATION_CLEANUP 0x0001 #define UNLOGGED_RELATION_CLEANUP 0x0001
#define UNLOGGED_RELATION_INIT 0x0002 #define UNLOGGED_RELATION_INIT 0x0002