Compare commits

...

2 Commits

Author SHA1 Message Date
Noah Misch
bf1c21c4fa Dissociate btequalimage() from interval_ops, ending its deduplication.
Under interval_ops, some equal values are distinguishable.  One such
pair is '24:00:00' and '1 day'.  With that being so, btequalimage()
breaches the documented contract for the "equalimage" btree support
function.  This can cause incorrect results from index-only scans.
Users should REINDEX any btree indexes having interval-type columns.
After updating, pg_amcheck will report an error for almost all such
indexes.  This fix makes interval_ops simply omit the support function,
like numeric_ops does.  Back-pack to v13, where btequalimage() first
appeared.  In back branches, for the benefit of old catalog content,
btequalimage() code will return false for type "interval".  Going
forward, back-branch initdb will include the catalog change.

Reviewed by Peter Geoghegan.

Discussion: https://postgr.es/m/20231011013317.22.nmisch@google.com
2023-10-14 16:33:54 -07:00
Noah Misch
06ff064842 Don't spuriously report FD_SETSIZE exhaustion on Windows.
Starting on 2023-08-03, this intermittently terminated a "pgbench -C"
test in CI.  It could affect a high-client-count "pgbench" without "-C".
While parallel reindexdb and vacuumdb reach the same problematic check,
sufficient client count and/or connection turnover is less plausible for
them.  Given the lack of examples from the buildfarm or from manual
builds, reproducing this must entail rare operating system
configurations.  Also correct the associated error message, which was
wrong for non-Windows.  Back-patch to v12, where the pgbench check first
appeared.  While v11 vacuumdb has the problematic check, reaching it
with typical vacuumdb usage is implausible.

Reviewed by Thomas Munro.

Discussion: https://postgr.es/m/CA+hUKG+JwvTNdcyJTriy9BbtzF1veSRQ=9M_ZKFn9_LqE7Kp7Q@mail.gmail.com
2023-10-14 15:54:49 -07:00
7 changed files with 70 additions and 20 deletions

View File

@ -31,6 +31,7 @@
#include "access/xact.h" #include "access/xact.h"
#include "catalog/index.h" #include "catalog/index.h"
#include "catalog/pg_am.h" #include "catalog/pg_am.h"
#include "catalog/pg_opfamily_d.h"
#include "commands/tablecmds.h" #include "commands/tablecmds.h"
#include "common/pg_prng.h" #include "common/pg_prng.h"
#include "lib/bloomfilter.h" #include "lib/bloomfilter.h"
@ -338,10 +339,20 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed,
errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version", errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version",
RelationGetRelationName(indrel)))); RelationGetRelationName(indrel))));
if (allequalimage && !_bt_allequalimage(indrel, false)) if (allequalimage && !_bt_allequalimage(indrel, false))
{
bool has_interval_ops = false;
for (int i = 0; i < IndexRelationGetNumberOfKeyAttributes(indrel); i++)
if (indrel->rd_opfamily[i] == INTERVAL_BTREE_FAM_OID)
has_interval_ops = true;
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED), (errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe", errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe",
RelationGetRelationName(indrel)))); RelationGetRelationName(indrel)),
has_interval_ops
? errhint("This is known of \"interval\" indexes last built on a version predating 2023-11.")
: 0));
}
/* Check index, possibly against table it is an index on */ /* Check index, possibly against table it is an index on */
bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck, bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck,

View File

@ -43,6 +43,7 @@
#include "postgres.h" #include "postgres.h"
#include "access/detoast.h" #include "access/detoast.h"
#include "catalog/pg_type_d.h"
#include "common/hashfn.h" #include "common/hashfn.h"
#include "fmgr.h" #include "fmgr.h"
#include "utils/builtins.h" #include "utils/builtins.h"
@ -385,20 +386,17 @@ datum_image_hash(Datum value, bool typByVal, int typLen)
* datum_image_eq() in all cases can use this as their "equalimage" support * datum_image_eq() in all cases can use this as their "equalimage" support
* function. * function.
* *
* Currently, we unconditionally assume that any B-Tree operator class that * Earlier minor releases erroneously associated this function with
* registers btequalimage as its support function 4 must be able to safely use * interval_ops. Detect that case to rescind deduplication support, without
* optimizations like deduplication (i.e. we return true unconditionally). If * requiring initdb.
* it ever proved necessary to rescind support for an operator class, we could
* do that in a targeted fashion by doing something with the opcintype
* argument.
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
Datum Datum
btequalimage(PG_FUNCTION_ARGS) btequalimage(PG_FUNCTION_ARGS)
{ {
/* Oid opcintype = PG_GETARG_OID(0); */ Oid opcintype = PG_GETARG_OID(0);
PG_RETURN_BOOL(true); PG_RETURN_BOOL(opcintype != INTERVALOID);
} }
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------

View File

@ -7782,14 +7782,23 @@ clear_socket_set(socket_set *sa)
static void static void
add_socket_to_set(socket_set *sa, int fd, int idx) add_socket_to_set(socket_set *sa, int fd, int idx)
{ {
/* See connect_slot() for background on this code. */
#ifdef WIN32
if (sa->fds.fd_count + 1 >= FD_SETSIZE)
{
pg_log_error("too many concurrent database clients for this platform: %d",
sa->fds.fd_count + 1);
exit(1);
}
#else
if (fd < 0 || fd >= FD_SETSIZE) if (fd < 0 || fd >= FD_SETSIZE)
{ {
/* pg_log_error("socket file descriptor out of range for select(): %d",
* Doing a hard exit here is a bit grotty, but it doesn't seem worth fd);
* complicating the API to make it less grotty. pg_log_error_hint("Try fewer concurrent database clients.");
*/ exit(1);
pg_fatal("too many client connections for select()");
} }
#endif
FD_SET(fd, &sa->fds); FD_SET(fd, &sa->fds);
if (fd > sa->maxfd) if (fd > sa->maxfd)
sa->maxfd = fd; sa->maxfd = fd;

View File

@ -295,8 +295,41 @@ connect_slot(ParallelSlotArray *sa, int slotno, const char *dbname)
slot->connection = connectDatabase(sa->cparams, sa->progname, sa->echo, false, true); slot->connection = connectDatabase(sa->cparams, sa->progname, sa->echo, false, true);
sa->cparams->override_dbname = old_override; sa->cparams->override_dbname = old_override;
if (PQsocket(slot->connection) >= FD_SETSIZE) /*
pg_fatal("too many jobs for this platform"); * POSIX defines FD_SETSIZE as the highest file descriptor acceptable to
* FD_SET() and allied macros. Windows defines it as a ceiling on the
* count of file descriptors in the set, not a ceiling on the value of
* each file descriptor; see
* https://learn.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-select
* and
* https://learn.microsoft.com/en-us/windows/win32/api/winsock/ns-winsock-fd_set.
* We can't ignore that, because Windows starts file descriptors at a
* higher value, delays reuse, and skips values. With less than ten
* concurrent file descriptors, opened and closed rapidly, one can reach
* file descriptor 1024.
*
* Doing a hard exit here is a bit grotty, but it doesn't seem worth
* complicating the API to make it less grotty.
*/
#ifdef WIN32
if (slotno >= FD_SETSIZE)
{
pg_log_error("too many jobs for this platform: %d", slotno);
exit(1);
}
#else
{
int fd = PQsocket(slot->connection);
if (fd >= FD_SETSIZE)
{
pg_log_error("socket file descriptor out of range for select(): %d",
fd);
pg_log_error_hint("Try fewer jobs.");
exit(1);
}
}
#endif
/* Setup the connection using the supplied command, if any. */ /* Setup the connection using the supplied command, if any. */
if (sa->initcmd) if (sa->initcmd)

View File

@ -172,8 +172,6 @@
{ amprocfamily => 'btree/interval_ops', amproclefttype => 'interval', { amprocfamily => 'btree/interval_ops', amproclefttype => 'interval',
amprocrighttype => 'interval', amprocnum => '3', amprocrighttype => 'interval', amprocnum => '3',
amproc => 'in_range(interval,interval,interval,bool,bool)' }, amproc => 'in_range(interval,interval,interval,bool,bool)' },
{ amprocfamily => 'btree/interval_ops', amproclefttype => 'interval',
amprocrighttype => 'interval', amprocnum => '4', amproc => 'btequalimage' },
{ amprocfamily => 'btree/macaddr_ops', amproclefttype => 'macaddr', { amprocfamily => 'btree/macaddr_ops', amproclefttype => 'macaddr',
amprocrighttype => 'macaddr', amprocnum => '1', amproc => 'macaddr_cmp' }, amprocrighttype => 'macaddr', amprocnum => '1', amproc => 'macaddr_cmp' },
{ amprocfamily => 'btree/macaddr_ops', amproclefttype => 'macaddr', { amprocfamily => 'btree/macaddr_ops', amproclefttype => 'macaddr',

View File

@ -50,7 +50,7 @@
opfmethod => 'btree', opfname => 'integer_ops' }, opfmethod => 'btree', opfname => 'integer_ops' },
{ oid => '1977', { oid => '1977',
opfmethod => 'hash', opfname => 'integer_ops' }, opfmethod => 'hash', opfname => 'integer_ops' },
{ oid => '1982', { oid => '1982', oid_symbol => 'INTERVAL_BTREE_FAM_OID',
opfmethod => 'btree', opfname => 'interval_ops' }, opfmethod => 'btree', opfname => 'interval_ops' },
{ oid => '1983', { oid => '1983',
opfmethod => 'hash', opfname => 'interval_ops' }, opfmethod => 'hash', opfname => 'interval_ops' },

View File

@ -2208,6 +2208,7 @@ ORDER BY 1, 2, 3;
| array_ops | array_ops | anyarray | array_ops | array_ops | anyarray
| float_ops | float4_ops | real | float_ops | float4_ops | real
| float_ops | float8_ops | double precision | float_ops | float8_ops | double precision
| interval_ops | interval_ops | interval
| jsonb_ops | jsonb_ops | jsonb | jsonb_ops | jsonb_ops | jsonb
| multirange_ops | multirange_ops | anymultirange | multirange_ops | multirange_ops | anymultirange
| numeric_ops | numeric_ops | numeric | numeric_ops | numeric_ops | numeric
@ -2216,7 +2217,7 @@ ORDER BY 1, 2, 3;
| record_ops | record_ops | record | record_ops | record_ops | record
| tsquery_ops | tsquery_ops | tsquery | tsquery_ops | tsquery_ops | tsquery
| tsvector_ops | tsvector_ops | tsvector | tsvector_ops | tsvector_ops | tsvector
(15 rows) (16 rows)
-- **************** pg_index **************** -- **************** pg_index ****************
-- Look for illegal values in pg_index fields. -- Look for illegal values in pg_index fields.