Compare commits

...

5 Commits

Author SHA1 Message Date
Jeff Davis
00d7fb5e2e Assert that buffers are marked dirty before XLogRegisterBuffer().
Enforce the rule from transam/README in XLogRegisterBuffer(), and
update callers to follow the rule.

Hash indexes sometimes register clean pages as a part of the locking
protocol, so provide a REGBUF_NO_CHANGE flag to support that use.

Discussion: https://postgr.es/m/c84114f8-c7f1-5b57-f85a-3adc31e1a904@iki.fi
Reviewed-by: Heikki Linnakangas
2023-10-23 17:17:46 -07:00
Michael Paquier
befe9451fb doc: Improve example query related to pg_wait_events
Author: Pavel Luzanov
Discussion: https://postgr.es/m/4f79be75-7e30-4817-b0da-4a691ea5427f@postgrespro.ru
2023-10-24 08:40:43 +09:00
Michael Paquier
9972c7de1d Fix typos in wait_event.c
Noticed while working on a different patch.  Introduced in af720b4c50a1.
2023-10-24 08:05:29 +09:00
Robert Haas
5b36e8f078 Change struct tablespaceinfo's oid member from 'char *' to 'Oid'
This shouldn't change behavior except in the unusual case where
there are file in the tablespace directory that have entirely
numeric names but are nevertheless not possible names for a
tablespace directory, either because their names have leading zeroes
that shouldn't be there, or the value is actually zero, or because
the value is too large to represent as an OID.

In those cases, the directory would previously have made it into
the list of tablespaceinfo objects and no longer will. Thus, base
backups will now ignore such directories, instead of treating them
as legitimate tablespace directories. Similarly, if entries for
such tablespaces occur in a tablespace_map file, they will now
be rejected as erroneous, instead of being honored.

This is infrastructure for future work that wants to be able to
know the tablespace of each relation that is part of a backup
*as an OID*. By strengthening the up-front validation, we don't
have to worry about weird cases later, and can more easily avoid
repeated string->integer conversions.

Patch by me, reviewed by David Steele.

Discussion: http://postgr.es/m/CA+TgmoZNVeBzoqDL8xvr-nkaepq815jtDR4nJzPew7=3iEuM1g@mail.gmail.com
2023-10-23 15:17:26 -04:00
Robert Haas
5c47c6546c Refactor parse_filename_for_nontemp_relation to parse more.
Instead of returning the number of characters in the RelFileNumber,
return the RelFileNumber itself. Continue to return the fork number,
as before, and additionally return the segment number.

parse_filename_for_nontemp_relation now rejects a RelFileNumber or
segment number that begins with a leading zero. Before, we accepted
such cases as relation filenames, but if we continued to do so after
this change, the function might return the same values for two
different files (e.g. 1234.5 and 001234.5 or 1234.005) which could be
annoying for callers. Since we don't actually ever generate filenames
with leading zeroes in the names, any such files that we find must
have been created by something other than PostgreSQL, and it is
therefore reasonable to treat them as non-relation files.

Along the way, change unlogged_relation_entry to store a RelFileNumber
rather than an OID. This update should have been made in
851f4cc75cdd8c831f1baa9a7abf8c8248b65890, but it was overlooked.
It's trivial to make the update as part of this commit, perhaps more
trivial than it would have been without it, so do that.

Patch by me, reviewed by David Steele.

Discussion: http://postgr.es/m/CA+TgmoZNVeBzoqDL8xvr-nkaepq815jtDR4nJzPew7=3iEuM1g@mail.gmail.com
2023-10-23 15:08:53 -04:00
21 changed files with 264 additions and 117 deletions

View File

@ -1119,7 +1119,7 @@ SELECT a.pid, a.wait_event, w.description
FROM pg_stat_activity a JOIN
pg_wait_events w ON (a.wait_event_type = w.type AND
a.wait_event = w.name)
WHERE wait_event is NOT NULL and a.state = 'active';
WHERE a.wait_event is NOT NULL and a.state = 'active';
-[ RECORD 1 ]------------------------------------------------------&zwsp;------------
pid | 686674
wait_event | WALInitSync

View File

@ -387,24 +387,22 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
START_CRIT_SECTION();
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogBeginInsert();
XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
if (BufferIsValid(childbuf))
XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
}
/* Perform the page update, and register any extra WAL data */
/*
* Perform the page update, dirty and register stack->buffer, and
* register any extra WAL data.
*/
btree->execPlaceToPage(btree, stack->buffer, stack,
insertdata, updateblkno, ptp_workspace);
MarkBufferDirty(stack->buffer);
/* An insert to an internal page finishes the split of the child. */
if (BufferIsValid(childbuf))
{
GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
MarkBufferDirty(childbuf);
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
}
if (RelationNeedsWAL(btree->index) && !btree->isBuild)

View File

@ -721,9 +721,12 @@ dataExecPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
/* Apply changes to page */
dataPlaceToPageLeafRecompress(buf, leaf);
MarkBufferDirty(buf);
/* If needed, register WAL data built by computeLeafRecompressWALData */
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, leaf->walinfo, leaf->walinfolen);
}
}
@ -1155,6 +1158,8 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
pitem = (PostingItem *) insertdata;
GinDataPageAddPostingItem(page, pitem, off);
MarkBufferDirty(buf);
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
@ -1167,6 +1172,7 @@ dataExecPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
data.offset = off;
data.newitem = *pitem;
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, (char *) &data,
sizeof(ginxlogInsertDataInternal));
}

View File

@ -571,6 +571,8 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
elog(ERROR, "failed to add item to index page in \"%s\"",
RelationGetRelationName(btree->index));
MarkBufferDirty(buf);
if (RelationNeedsWAL(btree->index) && !btree->isBuild)
{
/*
@ -583,6 +585,7 @@ entryExecPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
data.isDelete = insertData->isDelete;
data.offset = off;
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
XLogRegisterBufData(0, (char *) &data,
offsetof(ginxlogInsertEntry, tuple));
XLogRegisterBufData(0, (char *) insertData->entry,

View File

@ -397,6 +397,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
}
Assert((ptr - collectordata) <= collector->sumsize);
MarkBufferDirty(buffer);
if (needWal)
{
XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
@ -404,8 +407,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
}
metadata->tailFreeSize = PageGetExactFreeSpace(page);
MarkBufferDirty(buffer);
}
/*

View File

@ -824,11 +824,16 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
XLogRegisterData((char *) &xlrec, SizeOfHashDelete);
/*
* bucket buffer needs to be registered to ensure that we can
* acquire a cleanup lock on it during replay.
* bucket buffer was not changed, but still needs to be
* registered to ensure that we can acquire a cleanup lock on
* it during replay.
*/
if (!xlrec.is_primary_bucket_page)
XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
{
uint8 flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
XLogRegisterBuffer(0, bucket_buf, flags);
}
XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
XLogRegisterBufData(1, (char *) deletable,

View File

@ -658,11 +658,15 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
XLogRegisterData((char *) &xlrec, SizeOfHashSqueezePage);
/*
* bucket buffer needs to be registered to ensure that we can acquire
* a cleanup lock on it during replay.
* bucket buffer was not changed, but still needs to be registered to
* ensure that we can acquire a cleanup lock on it during replay.
*/
if (!xlrec.is_prim_bucket_same_wrt)
XLogRegisterBuffer(0, bucketbuf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
{
uint8 flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
XLogRegisterBuffer(0, bucketbuf, flags);
}
XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
if (xlrec.ntups > 0)
@ -960,11 +964,16 @@ readpage:
XLogRegisterData((char *) &xlrec, SizeOfHashMovePageContents);
/*
* bucket buffer needs to be registered to ensure that
* we can acquire a cleanup lock on it during replay.
* bucket buffer was not changed, but still needs to
* be registered to ensure that we can acquire a
* cleanup lock on it during replay.
*/
if (!xlrec.is_prim_bucket_same_wrt)
XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
{
int flags = REGBUF_STANDARD | REGBUF_NO_IMAGE | REGBUF_NO_CHANGE;
XLogRegisterBuffer(0, bucket_buf, flags);
}
XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
XLogRegisterBufData(1, (char *) itup_offsets,

View File

@ -8579,9 +8579,22 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
char *relpath = NULL;
char *s;
PGFileType de_type;
char *badp;
Oid tsoid;
/* Skip anything that doesn't look like a tablespace */
if (strspn(de->d_name, "0123456789") != strlen(de->d_name))
/*
* Try to parse the directory name as an unsigned integer.
*
* Tablespace directories should be positive integers that can be
* represented in 32 bits, with no leading zeroes or trailing
* garbage. If we come across a name that doesn't meet those
* criteria, skip it.
*/
if (de->d_name[0] < '1' || de->d_name[1] > '9')
continue;
errno = 0;
tsoid = strtoul(de->d_name, &badp, 10);
if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
continue;
snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);
@ -8656,7 +8669,7 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
}
ti = palloc(sizeof(tablespaceinfo));
ti->oid = pstrdup(de->d_name);
ti->oid = tsoid;
ti->path = pstrdup(linkpath);
ti->rpath = relpath;
ti->size = -1;

View File

@ -248,6 +248,20 @@ XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
Assert(!((flags & REGBUF_FORCE_IMAGE) && (flags & (REGBUF_NO_IMAGE))));
Assert(begininsert_called);
/*
* Ordinarily, buffer should be exclusive-locked and marked dirty before
* we get here, otherwise we could end up violating one of the rules in
* access/transam/README.
*
* Some callers intentionally register a clean page and never update that
* page's LSN; in that case they can pass the flag REGBUF_NO_CHANGE to
* bypass these checks.
*/
#ifdef USE_ASSERT_CHECKING
if (!(flags & REGBUF_NO_CHANGE))
Assert(BufferIsExclusiveLocked(buffer) && BufferIsDirty(buffer));
#endif
if (block_id >= max_registered_block_id)
{
if (block_id >= max_registered_buffers)
@ -1313,8 +1327,8 @@ log_newpage_range(Relation rel, ForkNumber forknum,
START_CRIT_SECTION();
for (i = 0; i < nbufs; i++)
{
XLogRegisterBuffer(i, bufpack[i], flags);
MarkBufferDirty(bufpack[i]);
XLogRegisterBuffer(i, bufpack[i], flags);
}
recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI);

View File

@ -678,7 +678,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
tablespaceinfo *ti = lfirst(lc);
char *linkloc;
linkloc = psprintf("pg_tblspc/%s", ti->oid);
linkloc = psprintf("pg_tblspc/%u", ti->oid);
/*
* Remove the existing symlink if any and Create the symlink
@ -692,7 +692,6 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
errmsg("could not create symbolic link \"%s\": %m",
linkloc)));
pfree(ti->oid);
pfree(ti->path);
pfree(ti);
}
@ -1341,6 +1340,8 @@ read_tablespace_map(List **tablespaces)
{
if (!was_backslash && (ch == '\n' || ch == '\r'))
{
char *endp;
if (i == 0)
continue; /* \r immediately followed by \n */
@ -1360,7 +1361,12 @@ read_tablespace_map(List **tablespaces)
str[n++] = '\0';
ti = palloc0(sizeof(tablespaceinfo));
ti->oid = pstrdup(str);
errno = 0;
ti->oid = strtoul(str, &endp, 10);
if (*endp != '\0' || errno == EINVAL || errno == ERANGE)
ereport(FATAL,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
ti->path = pstrdup(str + n);
*tablespaces = lappend(*tablespaces, ti);

View File

@ -97,7 +97,7 @@ FreeBackupManifest(backup_manifest_info *manifest)
* Add an entry to the backup manifest for a file.
*/
void
AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
AddFileToBackupManifest(backup_manifest_info *manifest, Oid spcoid,
const char *pathname, size_t size, pg_time_t mtime,
pg_checksum_context *checksum_ctx)
{
@ -114,9 +114,9 @@ AddFileToBackupManifest(backup_manifest_info *manifest, const char *spcoid,
* pathname relative to the data directory (ignoring the intermediate
* symlink traversal).
*/
if (spcoid != NULL)
if (OidIsValid(spcoid))
{
snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid,
snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%u/%s", spcoid,
pathname);
pathname = pathbuf;
}

View File

@ -75,14 +75,15 @@ typedef struct
pg_checksum_type manifest_checksum_type;
} basebackup_options;
static int64 sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly,
static int64 sendTablespace(bbsink *sink, char *path, Oid spcoid, bool sizeonly,
struct backup_manifest_info *manifest);
static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
List *tablespaces, bool sendtblspclinks,
backup_manifest_info *manifest, const char *spcoid);
backup_manifest_info *manifest, Oid spcoid);
static bool sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
struct stat *statbuf, bool missing_ok, Oid dboid,
backup_manifest_info *manifest, const char *spcoid);
struct stat *statbuf, bool missing_ok,
Oid dboid, Oid spcoid,
backup_manifest_info *manifest);
static off_t read_file_data_into_buffer(bbsink *sink,
const char *readfilename, int fd,
off_t offset, size_t length,
@ -305,7 +306,7 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
if (tmp->path == NULL)
tmp->size = sendDir(sink, ".", 1, true, state.tablespaces,
true, NULL, NULL);
true, NULL, InvalidOid);
else
tmp->size = sendTablespace(sink, tmp->path, tmp->oid, true,
NULL);
@ -346,7 +347,7 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
/* Then the bulk of the files... */
sendDir(sink, ".", 1, false, state.tablespaces,
sendtblspclinks, &manifest, NULL);
sendtblspclinks, &manifest, InvalidOid);
/* ... and pg_control after everything else. */
if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
@ -355,11 +356,11 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
errmsg("could not stat file \"%s\": %m",
XLOG_CONTROL_FILE)));
sendFile(sink, XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf,
false, InvalidOid, &manifest, NULL);
false, InvalidOid, InvalidOid, &manifest);
}
else
{
char *archive_name = psprintf("%s.tar", ti->oid);
char *archive_name = psprintf("%u.tar", ti->oid);
bbsink_begin_archive(sink, archive_name);
@ -623,8 +624,8 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
(errcode_for_file_access(),
errmsg("could not stat file \"%s\": %m", pathbuf)));
sendFile(sink, pathbuf, pathbuf, &statbuf, false, InvalidOid,
&manifest, NULL);
sendFile(sink, pathbuf, pathbuf, &statbuf, false,
InvalidOid, InvalidOid, &manifest);
/* unconditionally mark file as archived */
StatusFilePath(pathbuf, fname, ".done");
@ -1087,7 +1088,7 @@ sendFileWithContent(bbsink *sink, const char *filename, const char *content,
_tarWritePadding(sink, len);
AddFileToBackupManifest(manifest, NULL, filename, len,
AddFileToBackupManifest(manifest, InvalidOid, filename, len,
(pg_time_t) statbuf.st_mtime, &checksum_ctx);
}
@ -1099,7 +1100,7 @@ sendFileWithContent(bbsink *sink, const char *filename, const char *content,
* Only used to send auxiliary tablespaces, not PGDATA.
*/
static int64
sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly,
sendTablespace(bbsink *sink, char *path, Oid spcoid, bool sizeonly,
backup_manifest_info *manifest)
{
int64 size;
@ -1154,7 +1155,7 @@ sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly,
static int64
sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
List *tablespaces, bool sendtblspclinks, backup_manifest_info *manifest,
const char *spcoid)
Oid spcoid)
{
DIR *dir;
struct dirent *de;
@ -1197,9 +1198,9 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
{
int excludeIdx;
bool excludeFound;
ForkNumber relForkNum; /* Type of fork if file is a relation */
int relnumchars; /* Chars in filename that are the
* relnumber */
RelFileNumber relNumber;
ForkNumber relForkNum;
unsigned segno;
/* Skip special stuff */
if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
@ -1249,23 +1250,20 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
/* Exclude all forks for unlogged tables except the init fork */
if (isDbDir &&
parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
&relForkNum))
parse_filename_for_nontemp_relation(de->d_name, &relNumber,
&relForkNum, &segno))
{
/* Never exclude init forks */
if (relForkNum != INIT_FORKNUM)
{
char initForkFile[MAXPGPATH];
char relNumber[OIDCHARS + 1];
/*
* If any other type of fork, check if there is an init fork
* with the same RelFileNumber. If so, the file can be
* excluded.
*/
memcpy(relNumber, de->d_name, relnumchars);
relNumber[relnumchars] = '\0';
snprintf(initForkFile, sizeof(initForkFile), "%s/%s_init",
snprintf(initForkFile, sizeof(initForkFile), "%s/%u_init",
path, relNumber);
if (lstat(initForkFile, &statbuf) == 0)
@ -1419,8 +1417,8 @@ sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
if (!sizeonly)
sent = sendFile(sink, pathbuf, pathbuf + basepathlen + 1, &statbuf,
true, isDbDir ? atooid(lastDir + 1) : InvalidOid,
manifest, spcoid);
true, isDbDir ? atooid(lastDir + 1) : InvalidOid, spcoid,
manifest);
if (sent || sizeonly)
{
@ -1489,8 +1487,8 @@ is_checksummed_file(const char *fullpath, const char *filename)
*/
static bool
sendFile(bbsink *sink, const char *readfilename, const char *tarfilename,
struct stat *statbuf, bool missing_ok, Oid dboid,
backup_manifest_info *manifest, const char *spcoid)
struct stat *statbuf, bool missing_ok, Oid dboid, Oid spcoid,
backup_manifest_info *manifest)
{
int fd;
BlockNumber blkno = 0;

View File

@ -407,7 +407,7 @@ SendTablespaceList(List *tablespaces)
}
else
{
values[0] = ObjectIdGetDatum(strtoul(ti->oid, NULL, 10));
values[0] = ObjectIdGetDatum(ti->oid);
values[1] = CStringGetTextDatum(ti->path);
}
if (ti->size >= 0)

View File

@ -2098,6 +2098,65 @@ ExtendBufferedRelShared(BufferManagerRelation bmr,
return first_block;
}
/*
* BufferIsExclusiveLocked
*
* Checks if buffer is exclusive-locked.
*
* Buffer must be pinned.
*/
bool
BufferIsExclusiveLocked(Buffer buffer)
{
BufferDesc *bufHdr;
if (BufferIsLocal(buffer))
{
int bufid = -buffer - 1;
bufHdr = GetLocalBufferDescriptor(bufid);
}
else
{
bufHdr = GetBufferDescriptor(buffer - 1);
}
Assert(BufferIsPinned(buffer));
return LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr),
LW_EXCLUSIVE);
}
/*
* BufferIsDirty
*
* Checks if buffer is already dirty.
*
* Buffer must be pinned and exclusive-locked. (Without an exclusive lock,
* the result may be stale before it's returned.)
*/
bool
BufferIsDirty(Buffer buffer)
{
BufferDesc *bufHdr;
if (BufferIsLocal(buffer))
{
int bufid = -buffer - 1;
bufHdr = GetLocalBufferDescriptor(bufid);
}
else
{
bufHdr = GetBufferDescriptor(buffer - 1);
}
Assert(BufferIsPinned(buffer));
Assert(LWLockHeldByMeInMode(BufferDescriptorGetContentLock(bufHdr),
LW_EXCLUSIVE));
return pg_atomic_read_u32(&bufHdr->state) & BM_DIRTY;
}
/*
* MarkBufferDirty
*

View File

@ -31,7 +31,7 @@ static void ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname,
typedef struct
{
Oid reloid; /* hash key */
RelFileNumber relnumber; /* hash key */
} unlogged_relation_entry;
/*
@ -195,12 +195,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{
ForkNumber forkNum;
int relnumchars;
unsigned segno;
unlogged_relation_entry ent;
/* Skip anything that doesn't look like a relation data file. */
if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
&forkNum))
if (!parse_filename_for_nontemp_relation(de->d_name,
&ent.relnumber,
&forkNum, &segno))
continue;
/* Also skip it unless this is the init fork. */
@ -208,10 +209,8 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
continue;
/*
* Put the OID portion of the name into the hash table, if it
* isn't already.
* Put the RelFileNumber into the hash table, if it isn't already.
*/
ent.reloid = atooid(de->d_name);
(void) hash_search(hash, &ent, HASH_ENTER, NULL);
}
@ -235,12 +234,13 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{
ForkNumber forkNum;
int relnumchars;
unsigned segno;
unlogged_relation_entry ent;
/* Skip anything that doesn't look like a relation data file. */
if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
&forkNum))
if (!parse_filename_for_nontemp_relation(de->d_name,
&ent.relnumber,
&forkNum, &segno))
continue;
/* We never remove the init fork. */
@ -251,7 +251,6 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
* See whether the OID portion of the name shows up in the hash
* table. If so, nuke it!
*/
ent.reloid = atooid(de->d_name);
if (hash_search(hash, &ent, HASH_FIND, NULL))
{
snprintf(rm_path, sizeof(rm_path), "%s/%s",
@ -285,14 +284,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{
ForkNumber forkNum;
int relnumchars;
char relnumbuf[OIDCHARS + 1];
RelFileNumber relNumber;
unsigned segno;
char srcpath[MAXPGPATH * 2];
char dstpath[MAXPGPATH];
/* Skip anything that doesn't look like a relation data file. */
if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
&forkNum))
if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber,
&forkNum, &segno))
continue;
/* Also skip it unless this is the init fork. */
@ -304,11 +303,12 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
dbspacedirname, de->d_name);
/* Construct destination pathname. */
memcpy(relnumbuf, de->d_name, relnumchars);
relnumbuf[relnumchars] = '\0';
snprintf(dstpath, sizeof(dstpath), "%s/%s%s",
dbspacedirname, relnumbuf, de->d_name + relnumchars + 1 +
strlen(forkNames[INIT_FORKNUM]));
if (segno == 0)
snprintf(dstpath, sizeof(dstpath), "%s/%u",
dbspacedirname, relNumber);
else
snprintf(dstpath, sizeof(dstpath), "%s/%u.%u",
dbspacedirname, relNumber, segno);
/* OK, we're ready to perform the actual copy. */
elog(DEBUG2, "copying %s to %s", srcpath, dstpath);
@ -327,14 +327,14 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
dbspace_dir = AllocateDir(dbspacedirname);
while ((de = ReadDir(dbspace_dir, dbspacedirname)) != NULL)
{
RelFileNumber relNumber;
ForkNumber forkNum;
int relnumchars;
char relnumbuf[OIDCHARS + 1];
unsigned segno;
char mainpath[MAXPGPATH];
/* Skip anything that doesn't look like a relation data file. */
if (!parse_filename_for_nontemp_relation(de->d_name, &relnumchars,
&forkNum))
if (!parse_filename_for_nontemp_relation(de->d_name, &relNumber,
&forkNum, &segno))
continue;
/* Also skip it unless this is the init fork. */
@ -342,11 +342,12 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
continue;
/* Construct main fork pathname. */
memcpy(relnumbuf, de->d_name, relnumchars);
relnumbuf[relnumchars] = '\0';
snprintf(mainpath, sizeof(mainpath), "%s/%s%s",
dbspacedirname, relnumbuf, de->d_name + relnumchars + 1 +
strlen(forkNames[INIT_FORKNUM]));
if (segno == 0)
snprintf(mainpath, sizeof(mainpath), "%s/%u",
dbspacedirname, relNumber);
else
snprintf(mainpath, sizeof(mainpath), "%s/%u.%u",
dbspacedirname, relNumber, segno);
fsync_fname(mainpath, false);
}
@ -371,52 +372,82 @@ ResetUnloggedRelationsInDbspaceDir(const char *dbspacedirname, int op)
* This function returns true if the file appears to be in the correct format
* for a non-temporary relation and false otherwise.
*
* NB: If this function returns true, the caller is entitled to assume that
* *relnumchars has been set to a value no more than OIDCHARS, and thus
* that a buffer of OIDCHARS+1 characters is sufficient to hold the
* RelFileNumber portion of the filename. This is critical to protect against
* a possible buffer overrun.
* If it returns true, it sets *relnumber, *fork, and *segno to the values
* extracted from the filename. If it returns false, these values are set to
* InvalidRelFileNumber, InvalidForkNumber, and 0, respectively.
*/
bool
parse_filename_for_nontemp_relation(const char *name, int *relnumchars,
ForkNumber *fork)
parse_filename_for_nontemp_relation(const char *name, RelFileNumber *relnumber,
ForkNumber *fork, unsigned *segno)
{
int pos;
unsigned long n,
s;
ForkNumber f;
char *endp;
/* Look for a non-empty string of digits (that isn't too long). */
for (pos = 0; isdigit((unsigned char) name[pos]); ++pos)
;
if (pos == 0 || pos > OIDCHARS)
*relnumber = InvalidRelFileNumber;
*fork = InvalidForkNumber;
*segno = 0;
/*
* Relation filenames should begin with a digit that is not a zero. By
* rejecting cases involving leading zeroes, the caller can assume that
* there's only one possible string of characters that could have produced
* any given value for *relnumber.
*
* (To be clear, we don't expect files with names like 0017.3 to exist at
* all -- but if 0017.3 does exist, it's a non-relation file, not part of
* the main fork for relfilenode 17.)
*/
if (name[0] < '1' || name[0] > '9')
return false;
*relnumchars = pos;
/*
* Parse the leading digit string. If the value is out of range, we
* conclude that this isn't a relation file at all.
*/
errno = 0;
n = strtoul(name, &endp, 10);
if (errno || name == endp || n <= 0 || n > PG_UINT32_MAX)
return false;
name = endp;
/* Check for a fork name. */
if (name[pos] != '_')
*fork = MAIN_FORKNUM;
if (*name != '_')
f = MAIN_FORKNUM;
else
{
int forkchar;
forkchar = forkname_chars(&name[pos + 1], fork);
forkchar = forkname_chars(name + 1, &f);
if (forkchar <= 0)
return false;
pos += forkchar + 1;
name += forkchar + 1;
}
/* Check for a segment number. */
if (name[pos] == '.')
if (*name != '.')
s = 0;
else
{
int segchar;
for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar)
;
if (segchar <= 1)
/* Reject leading zeroes, just like we do for RelFileNumber. */
if (name[0] < '1' || name[0] > '9')
return false;
pos += segchar;
errno = 0;
s = strtoul(name + 1, &endp, 10);
if (errno || name + 1 == endp || s <= 0 || s > PG_UINT32_MAX)
return false;
name = endp;
}
/* Now we should be at the end. */
if (name[pos] != '\0')
if (*name != '\0')
return false;
/* Set out parameters and return. */
*relnumber = (RelFileNumber) n;
*fork = f;
*segno = (unsigned) s;
return true;
}

View File

@ -56,9 +56,9 @@ uint32 *my_wait_event_info = &local_my_wait_event_info;
* It is used to ensure that no duplicated entries are registered.
*
* The size of the hash table is based on the assumption that
* WAIT_EVENT_EXTENSION_BASH_INIT_SIZE is enough for most cases, and it seems
* WAIT_EVENT_EXTENSION_HASH_INIT_SIZE is enough for most cases, and it seems
* unlikely that the number of entries will reach
* WAIT_EVENT_EXTENSION_BASH_MAX_SIZE.
* WAIT_EVENT_EXTENSION_HASH_MAX_SIZE.
*/
static HTAB *WaitEventExtensionHashById; /* find names from IDs */
static HTAB *WaitEventExtensionHashByName; /* find IDs from names */

View File

@ -37,6 +37,7 @@
* will be skipped) */
#define REGBUF_KEEP_DATA 0x10 /* include data even if a full-page image
* is taken */
#define REGBUF_NO_CHANGE 0x20 /* intentionally register clean buffer */
/* prototypes for public functions in xloginsert.c: */
extern void XLogBeginInsert(void);

View File

@ -39,7 +39,7 @@ extern void InitializeBackupManifest(backup_manifest_info *manifest,
backup_manifest_option want_manifest,
pg_checksum_type manifest_checksum_type);
extern void AddFileToBackupManifest(backup_manifest_info *manifest,
const char *spcoid,
Oid spcoid,
const char *pathname, size_t size,
pg_time_t mtime,
pg_checksum_context *checksum_ctx);

View File

@ -27,7 +27,7 @@
*/
typedef struct
{
char *oid; /* tablespace's OID, as a decimal string */
Oid oid; /* tablespace's OID */
char *path; /* full path to tablespace's directory */
char *rpath; /* relative path if it's within PGDATA, else
* NULL */

View File

@ -179,6 +179,8 @@ extern Buffer ReadBufferWithoutRelcache(RelFileLocator rlocator,
bool permanent);
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
extern bool BufferIsExclusiveLocked(Buffer buffer);
extern bool BufferIsDirty(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer);
extern void IncrBufferRefCount(Buffer buffer);
extern void CheckBufferIsPinnedOnce(Buffer buffer);

View File

@ -20,8 +20,9 @@
extern void ResetUnloggedRelations(int op);
extern bool parse_filename_for_nontemp_relation(const char *name,
int *relnumchars,
ForkNumber *fork);
RelFileNumber *relnumber,
ForkNumber *fork,
unsigned *segno);
#define UNLOGGED_RELATION_CLEANUP 0x0001
#define UNLOGGED_RELATION_INIT 0x0002