mirror of
https://github.com/postgres/postgres.git
synced 2025-07-22 00:01:40 -04:00
Compare commits
6 Commits
40d5e5981c
...
b690e5facb
Author | SHA1 | Date | |
---|---|---|---|
|
b690e5facb | ||
|
0bc726d95a | ||
|
7704a1a72e | ||
|
4b14e18714 | ||
|
a02b37fc08 | ||
|
7021d3b176 |
@ -4018,27 +4018,26 @@ EXECUTE st1(101, 101);
|
|||||||
00101 | 00101
|
00101 | 00101
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SET enable_hashjoin TO off;
|
||||||
|
SET enable_sort TO off;
|
||||||
-- subquery using stable function (can't be sent to remote)
|
-- subquery using stable function (can't be sent to remote)
|
||||||
PREPARE st2(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 < $2 AND t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 > $1 AND date(c4) = '1970-01-17'::date) ORDER BY c1;
|
PREPARE st2(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 < $2 AND t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 > $1 AND date(c4) = '1970-01-17'::date) ORDER BY c1;
|
||||||
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st2(10, 20);
|
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st2(10, 20);
|
||||||
QUERY PLAN
|
QUERY PLAN
|
||||||
----------------------------------------------------------------------------------------------------------
|
----------------------------------------------------------------------------------------------------------------------------------
|
||||||
Sort
|
Nested Loop Semi Join
|
||||||
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
|
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
|
||||||
Sort Key: t1.c1
|
Join Filter: (t2.c3 = t1.c3)
|
||||||
-> Nested Loop Semi Join
|
-> Foreign Scan on public.ft1 t1
|
||||||
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
|
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
|
||||||
Join Filter: (t2.c3 = t1.c3)
|
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 20)) ORDER BY "C 1" ASC NULLS LAST
|
||||||
-> Foreign Scan on public.ft1 t1
|
-> Materialize
|
||||||
Output: t1.c1, t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
|
Output: t2.c3
|
||||||
Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 20))
|
-> Foreign Scan on public.ft2 t2
|
||||||
-> Materialize
|
|
||||||
Output: t2.c3
|
Output: t2.c3
|
||||||
-> Foreign Scan on public.ft2 t2
|
Filter: (date(t2.c4) = '01-17-1970'::date)
|
||||||
Output: t2.c3
|
Remote SQL: SELECT c3, c4 FROM "S 1"."T 1" WHERE (("C 1" > 10))
|
||||||
Filter: (date(t2.c4) = '01-17-1970'::date)
|
(12 rows)
|
||||||
Remote SQL: SELECT c3, c4 FROM "S 1"."T 1" WHERE (("C 1" > 10))
|
|
||||||
(15 rows)
|
|
||||||
|
|
||||||
EXECUTE st2(10, 20);
|
EXECUTE st2(10, 20);
|
||||||
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
|
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
|
||||||
@ -4052,6 +4051,8 @@ EXECUTE st2(101, 121);
|
|||||||
116 | 6 | 00116 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
|
116 | 6 | 00116 | Sat Jan 17 00:00:00 1970 PST | Sat Jan 17 00:00:00 1970 | 6 | 6 | foo
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
RESET enable_hashjoin;
|
||||||
|
RESET enable_sort;
|
||||||
-- subquery using immutable function (can be sent to remote)
|
-- subquery using immutable function (can be sent to remote)
|
||||||
PREPARE st3(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 < $2 AND t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 > $1 AND date(c5) = '1970-01-17'::date) ORDER BY c1;
|
PREPARE st3(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 < $2 AND t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 > $1 AND date(c5) = '1970-01-17'::date) ORDER BY c1;
|
||||||
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st3(10, 20);
|
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st3(10, 20);
|
||||||
|
@ -1122,11 +1122,15 @@ PREPARE st1(int, int) AS SELECT t1.c3, t2.c3 FROM ft1 t1, ft2 t2 WHERE t1.c1 = $
|
|||||||
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st1(1, 2);
|
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st1(1, 2);
|
||||||
EXECUTE st1(1, 1);
|
EXECUTE st1(1, 1);
|
||||||
EXECUTE st1(101, 101);
|
EXECUTE st1(101, 101);
|
||||||
|
SET enable_hashjoin TO off;
|
||||||
|
SET enable_sort TO off;
|
||||||
-- subquery using stable function (can't be sent to remote)
|
-- subquery using stable function (can't be sent to remote)
|
||||||
PREPARE st2(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 < $2 AND t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 > $1 AND date(c4) = '1970-01-17'::date) ORDER BY c1;
|
PREPARE st2(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 < $2 AND t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 > $1 AND date(c4) = '1970-01-17'::date) ORDER BY c1;
|
||||||
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st2(10, 20);
|
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st2(10, 20);
|
||||||
EXECUTE st2(10, 20);
|
EXECUTE st2(10, 20);
|
||||||
EXECUTE st2(101, 121);
|
EXECUTE st2(101, 121);
|
||||||
|
RESET enable_hashjoin;
|
||||||
|
RESET enable_sort;
|
||||||
-- subquery using immutable function (can be sent to remote)
|
-- subquery using immutable function (can be sent to remote)
|
||||||
PREPARE st3(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 < $2 AND t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 > $1 AND date(c5) = '1970-01-17'::date) ORDER BY c1;
|
PREPARE st3(int) AS SELECT * FROM ft1 t1 WHERE t1.c1 < $2 AND t1.c3 IN (SELECT c3 FROM ft2 t2 WHERE c1 > $1 AND date(c5) = '1970-01-17'::date) ORDER BY c1;
|
||||||
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st3(10, 20);
|
EXPLAIN (VERBOSE, COSTS OFF) EXECUTE st3(10, 20);
|
||||||
|
@ -2859,6 +2859,22 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
|
|||||||
</para></entry>
|
</para></entry>
|
||||||
</row>
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry role="func_table_entry"><para role="func_signature">
|
||||||
|
<indexterm>
|
||||||
|
<primary>unicode_assigned</primary>
|
||||||
|
</indexterm>
|
||||||
|
<function>unicode_assigned</function> ( <type>text</type> )
|
||||||
|
<returnvalue>text</returnvalue>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Returns <literal>true</literal> if all characters in the string are
|
||||||
|
assigned Unicode codepoints; <literal>false</literal> otherwise. This
|
||||||
|
function can only be used when the server encoding is
|
||||||
|
<literal>UTF8</literal>.
|
||||||
|
</para></entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
<row>
|
<row>
|
||||||
<entry role="func_table_entry"><para role="func_signature">
|
<entry role="func_table_entry"><para role="func_signature">
|
||||||
<indexterm>
|
<indexterm>
|
||||||
@ -23427,25 +23443,6 @@ SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n);
|
|||||||
This is equivalent to <function>current_user</function>.
|
This is equivalent to <function>current_user</function>.
|
||||||
</para></entry>
|
</para></entry>
|
||||||
</row>
|
</row>
|
||||||
|
|
||||||
<row>
|
|
||||||
<entry role="func_table_entry"><para role="func_signature">
|
|
||||||
<indexterm>
|
|
||||||
<primary>version</primary>
|
|
||||||
</indexterm>
|
|
||||||
<function>version</function> ()
|
|
||||||
<returnvalue>text</returnvalue>
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Returns a string describing the <productname>PostgreSQL</productname>
|
|
||||||
server's version. You can also get this information from
|
|
||||||
<xref linkend="guc-server-version"/>, or for a machine-readable
|
|
||||||
version use <xref linkend="guc-server-version-num"/>. Software
|
|
||||||
developers should use <varname>server_version_num</varname> (available
|
|
||||||
since 8.2) or <xref linkend="libpq-PQserverVersion"/> instead of
|
|
||||||
parsing the text version.
|
|
||||||
</para></entry>
|
|
||||||
</row>
|
|
||||||
</tbody>
|
</tbody>
|
||||||
</tgroup>
|
</tgroup>
|
||||||
</table>
|
</table>
|
||||||
@ -26332,6 +26329,80 @@ SELECT collation for ('foo' COLLATE "de_DE");
|
|||||||
|
|
||||||
</sect2>
|
</sect2>
|
||||||
|
|
||||||
|
<sect2 id="functions-info-version">
|
||||||
|
<title>Version Information Functions</title>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The functions shown in <xref linkend="functions-version"/>
|
||||||
|
print version information.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<table id="functions-version">
|
||||||
|
<title>Version Information Functions</title>
|
||||||
|
<tgroup cols="1">
|
||||||
|
<thead>
|
||||||
|
<row>
|
||||||
|
<entry role="func_table_entry"><para role="func_signature">
|
||||||
|
Function
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Description
|
||||||
|
</para></entry>
|
||||||
|
</row>
|
||||||
|
</thead>
|
||||||
|
|
||||||
|
<tbody>
|
||||||
|
<row>
|
||||||
|
<entry role="func_table_entry"><para role="func_signature">
|
||||||
|
<indexterm>
|
||||||
|
<primary>version</primary>
|
||||||
|
</indexterm>
|
||||||
|
<function>version</function> ()
|
||||||
|
<returnvalue>text</returnvalue>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Returns a string describing the <productname>PostgreSQL</productname>
|
||||||
|
server's version. You can also get this information from
|
||||||
|
<xref linkend="guc-server-version"/>, or for a machine-readable
|
||||||
|
version use <xref linkend="guc-server-version-num"/>. Software
|
||||||
|
developers should use <varname>server_version_num</varname> (available
|
||||||
|
since 8.2) or <xref linkend="libpq-PQserverVersion"/> instead of
|
||||||
|
parsing the text version.
|
||||||
|
</para></entry>
|
||||||
|
</row>
|
||||||
|
|
||||||
|
<row>
|
||||||
|
<entry role="func_table_entry"><para role="func_signature">
|
||||||
|
<indexterm>
|
||||||
|
<primary>unicode_version</primary>
|
||||||
|
</indexterm>
|
||||||
|
<function>unicode_version</function> ()
|
||||||
|
<returnvalue>text</returnvalue>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Returns a string representing the version of Unicode used by
|
||||||
|
<productname>PostgreSQL</productname>.
|
||||||
|
</para></entry>
|
||||||
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry role="func_table_entry"><para role="func_signature">
|
||||||
|
<indexterm>
|
||||||
|
<primary>icu_unicode_version</primary>
|
||||||
|
</indexterm>
|
||||||
|
<function>icu_unicode_version</function> ()
|
||||||
|
<returnvalue>text</returnvalue>
|
||||||
|
</para>
|
||||||
|
<para>
|
||||||
|
Returns a string representing the version of Unicode used by ICU, if
|
||||||
|
the server was built with ICU support; otherwise returns
|
||||||
|
<literal>NULL</literal> </para></entry>
|
||||||
|
</row>
|
||||||
|
</tbody>
|
||||||
|
</tgroup>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
</sect2>
|
||||||
|
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
<sect1 id="functions-admin">
|
<sect1 id="functions-admin">
|
||||||
|
@ -23,7 +23,9 @@
|
|||||||
#include "catalog/pg_type.h"
|
#include "catalog/pg_type.h"
|
||||||
#include "common/hashfn.h"
|
#include "common/hashfn.h"
|
||||||
#include "common/int.h"
|
#include "common/int.h"
|
||||||
|
#include "common/unicode_category.h"
|
||||||
#include "common/unicode_norm.h"
|
#include "common/unicode_norm.h"
|
||||||
|
#include "common/unicode_version.h"
|
||||||
#include "funcapi.h"
|
#include "funcapi.h"
|
||||||
#include "lib/hyperloglog.h"
|
#include "lib/hyperloglog.h"
|
||||||
#include "libpq/pqformat.h"
|
#include "libpq/pqformat.h"
|
||||||
@ -6237,6 +6239,65 @@ unicode_norm_form_from_string(const char *formstr)
|
|||||||
return form;
|
return form;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns version of Unicode used by Postgres in "major.minor" format (the
|
||||||
|
* same format as the Unicode version reported by ICU). The third component
|
||||||
|
* ("update version") never involves additions to the character repertiore and
|
||||||
|
* is unimportant for most purposes.
|
||||||
|
*
|
||||||
|
* See: https://unicode.org/versions/
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
unicode_version(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
PG_RETURN_TEXT_P(cstring_to_text(PG_UNICODE_VERSION));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns version of Unicode used by ICU, if enabled; otherwise NULL.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
icu_unicode_version(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
#ifdef USE_ICU
|
||||||
|
PG_RETURN_TEXT_P(cstring_to_text(U_UNICODE_VERSION));
|
||||||
|
#else
|
||||||
|
PG_RETURN_NULL();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check whether the string contains only assigned Unicode code
|
||||||
|
* points. Requires that the database encoding is UTF-8.
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
unicode_assigned(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
text *input = PG_GETARG_TEXT_PP(0);
|
||||||
|
unsigned char *p;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
if (GetDatabaseEncoding() != PG_UTF8)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errmsg("Unicode categorization can only be performed if server encoding is UTF8")));
|
||||||
|
|
||||||
|
/* convert to pg_wchar */
|
||||||
|
size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
|
||||||
|
p = (unsigned char *) VARDATA_ANY(input);
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
{
|
||||||
|
pg_wchar uchar = utf8_to_unicode(p);
|
||||||
|
int category = unicode_category(uchar);
|
||||||
|
|
||||||
|
if (category == PG_U_UNASSIGNED)
|
||||||
|
PG_RETURN_BOOL(false);
|
||||||
|
|
||||||
|
p += pg_utf_mblen(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_BOOL(true);
|
||||||
|
}
|
||||||
|
|
||||||
Datum
|
Datum
|
||||||
unicode_normalize_func(PG_FUNCTION_ARGS)
|
unicode_normalize_func(PG_FUNCTION_ARGS)
|
||||||
{
|
{
|
||||||
|
@ -1473,7 +1473,9 @@ check_GUC_init(struct config_generic *gconf)
|
|||||||
{
|
{
|
||||||
struct config_string *conf = (struct config_string *) gconf;
|
struct config_string *conf = (struct config_string *) gconf;
|
||||||
|
|
||||||
if (*conf->variable != NULL && strcmp(*conf->variable, conf->boot_val) != 0)
|
if (*conf->variable != NULL &&
|
||||||
|
(conf->boot_val == NULL ||
|
||||||
|
strcmp(*conf->variable, conf->boot_val) != 0))
|
||||||
{
|
{
|
||||||
elog(LOG, "GUC (PGC_STRING) %s, boot_val=%s, C-var=%s",
|
elog(LOG, "GUC (PGC_STRING) %s, boot_val=%s, C-var=%s",
|
||||||
conf->gen.name, conf->boot_val ? conf->boot_val : "<null>", *conf->variable);
|
conf->gen.name, conf->boot_val ? conf->boot_val : "<null>", *conf->variable);
|
||||||
@ -4213,8 +4215,7 @@ SetConfigOption(const char *name, const char *value,
|
|||||||
/*
|
/*
|
||||||
* Fetch the current value of the option `name', as a string.
|
* Fetch the current value of the option `name', as a string.
|
||||||
*
|
*
|
||||||
* If the option doesn't exist, return NULL if missing_ok is true (NOTE that
|
* If the option doesn't exist, return NULL if missing_ok is true,
|
||||||
* this cannot be distinguished from a string variable with a NULL value!),
|
|
||||||
* otherwise throw an ereport and don't return.
|
* otherwise throw an ereport and don't return.
|
||||||
*
|
*
|
||||||
* If restrict_privileged is true, we also enforce that only superusers and
|
* If restrict_privileged is true, we also enforce that only superusers and
|
||||||
@ -4257,7 +4258,8 @@ GetConfigOption(const char *name, bool missing_ok, bool restrict_privileged)
|
|||||||
return buffer;
|
return buffer;
|
||||||
|
|
||||||
case PGC_STRING:
|
case PGC_STRING:
|
||||||
return *((struct config_string *) record)->variable;
|
return *((struct config_string *) record)->variable ?
|
||||||
|
*((struct config_string *) record)->variable : "";
|
||||||
|
|
||||||
case PGC_ENUM:
|
case PGC_ENUM:
|
||||||
return config_enum_lookup_by_value((struct config_enum *) record,
|
return config_enum_lookup_by_value((struct config_enum *) record,
|
||||||
@ -4304,7 +4306,8 @@ GetConfigOptionResetString(const char *name)
|
|||||||
return buffer;
|
return buffer;
|
||||||
|
|
||||||
case PGC_STRING:
|
case PGC_STRING:
|
||||||
return ((struct config_string *) record)->reset_val;
|
return ((struct config_string *) record)->reset_val ?
|
||||||
|
((struct config_string *) record)->reset_val : "";
|
||||||
|
|
||||||
case PGC_ENUM:
|
case PGC_ENUM:
|
||||||
return config_enum_lookup_by_value((struct config_enum *) record,
|
return config_enum_lookup_by_value((struct config_enum *) record,
|
||||||
@ -5255,7 +5258,14 @@ get_explain_guc_options(int *num)
|
|||||||
{
|
{
|
||||||
struct config_string *lconf = (struct config_string *) conf;
|
struct config_string *lconf = (struct config_string *) conf;
|
||||||
|
|
||||||
modified = (strcmp(lconf->boot_val, *(lconf->variable)) != 0);
|
if (lconf->boot_val == NULL &&
|
||||||
|
*lconf->variable == NULL)
|
||||||
|
modified = false;
|
||||||
|
else if (lconf->boot_val == NULL ||
|
||||||
|
*lconf->variable == NULL)
|
||||||
|
modified = true;
|
||||||
|
else
|
||||||
|
modified = (strcmp(lconf->boot_val, *(lconf->variable)) != 0);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -5482,7 +5492,8 @@ write_one_nondefault_variable(FILE *fp, struct config_generic *gconf)
|
|||||||
{
|
{
|
||||||
struct config_string *conf = (struct config_string *) gconf;
|
struct config_string *conf = (struct config_string *) gconf;
|
||||||
|
|
||||||
fprintf(fp, "%s", *conf->variable);
|
if (*conf->variable)
|
||||||
|
fprintf(fp, "%s", *conf->variable);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -248,9 +248,17 @@ start_postmaster(ClusterInfo *cluster, bool report_and_exit_on_error)
|
|||||||
* invalidation of slots during the upgrade. We set this option when
|
* invalidation of slots during the upgrade. We set this option when
|
||||||
* cluster is PG17 or later because logical replication slots can only be
|
* cluster is PG17 or later because logical replication slots can only be
|
||||||
* migrated since then. Besides, max_slot_wal_keep_size is added in PG13.
|
* migrated since then. Besides, max_slot_wal_keep_size is added in PG13.
|
||||||
|
*
|
||||||
|
* Use max_logical_replication_workers as 0 to prevent a startup of the
|
||||||
|
* logical replication launcher while upgrading because it may start apply
|
||||||
|
* workers that could start receiving changes from the publisher before
|
||||||
|
* the physical files are put in place, causing corruption on the new
|
||||||
|
* cluster upgrading to. Like the previous parameter, this is set only
|
||||||
|
* when a cluster is PG17 or later as logical slots can only be migrated
|
||||||
|
* since this version.
|
||||||
*/
|
*/
|
||||||
if (GET_MAJOR_VERSION(cluster->major_version) >= 1700)
|
if (GET_MAJOR_VERSION(cluster->major_version) >= 1700)
|
||||||
appendPQExpBufferStr(&pgoptions, " -c max_slot_wal_keep_size=-1");
|
appendPQExpBufferStr(&pgoptions, " -c max_slot_wal_keep_size=-1 -c max_logical_replication_workers=0");
|
||||||
|
|
||||||
/* Use -b to disable autovacuum. */
|
/* Use -b to disable autovacuum. */
|
||||||
snprintf(cmd, sizeof(cmd),
|
snprintf(cmd, sizeof(cmd),
|
||||||
|
@ -78,6 +78,7 @@ OBJS_COMMON = \
|
|||||||
scram-common.o \
|
scram-common.o \
|
||||||
string.o \
|
string.o \
|
||||||
stringinfo.o \
|
stringinfo.o \
|
||||||
|
unicode_category.o \
|
||||||
unicode_norm.o \
|
unicode_norm.o \
|
||||||
username.o \
|
username.o \
|
||||||
wait_error.o \
|
wait_error.o \
|
||||||
|
@ -30,6 +30,7 @@ common_sources = files(
|
|||||||
'scram-common.c',
|
'scram-common.c',
|
||||||
'string.c',
|
'string.c',
|
||||||
'stringinfo.c',
|
'stringinfo.c',
|
||||||
|
'unicode_category.c',
|
||||||
'unicode_norm.c',
|
'unicode_norm.c',
|
||||||
'username.c',
|
'username.c',
|
||||||
'wait_error.c',
|
'wait_error.c',
|
||||||
|
@ -15,11 +15,15 @@ include $(top_builddir)/src/Makefile.global
|
|||||||
override CPPFLAGS := -DFRONTEND -I. $(CPPFLAGS)
|
override CPPFLAGS := -DFRONTEND -I. $(CPPFLAGS)
|
||||||
LIBS += $(PTHREAD_LIBS)
|
LIBS += $(PTHREAD_LIBS)
|
||||||
|
|
||||||
|
LDFLAGS_INTERNAL += $(ICU_LIBS)
|
||||||
|
CPPFLAGS += $(ICU_CFLAGS)
|
||||||
|
|
||||||
# By default, do nothing.
|
# By default, do nothing.
|
||||||
all:
|
all:
|
||||||
|
|
||||||
update-unicode: unicode_norm_table.h unicode_nonspacing_table.h unicode_east_asian_fw_table.h unicode_normprops_table.h unicode_norm_hashfunc.h
|
update-unicode: unicode_category_table.h unicode_norm_table.h unicode_nonspacing_table.h unicode_east_asian_fw_table.h unicode_normprops_table.h unicode_norm_hashfunc.h unicode_version.h
|
||||||
mv $^ $(top_srcdir)/src/include/common/
|
mv $^ $(top_srcdir)/src/include/common/
|
||||||
|
$(MAKE) category-check
|
||||||
$(MAKE) normalization-check
|
$(MAKE) normalization-check
|
||||||
|
|
||||||
# These files are part of the Unicode Character Database. Download
|
# These files are part of the Unicode Character Database. Download
|
||||||
@ -28,6 +32,12 @@ update-unicode: unicode_norm_table.h unicode_nonspacing_table.h unicode_east_asi
|
|||||||
UnicodeData.txt EastAsianWidth.txt DerivedNormalizationProps.txt CompositionExclusions.txt NormalizationTest.txt: $(top_builddir)/src/Makefile.global
|
UnicodeData.txt EastAsianWidth.txt DerivedNormalizationProps.txt CompositionExclusions.txt NormalizationTest.txt: $(top_builddir)/src/Makefile.global
|
||||||
$(DOWNLOAD) https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/$(@F)
|
$(DOWNLOAD) https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/$(@F)
|
||||||
|
|
||||||
|
unicode_version.h: generate-unicode_version.pl
|
||||||
|
$(PERL) $< --version $(UNICODE_VERSION)
|
||||||
|
|
||||||
|
unicode_category_table.h: generate-unicode_category_table.pl UnicodeData.txt
|
||||||
|
$(PERL) $<
|
||||||
|
|
||||||
# Generation of conversion tables used for string normalization with
|
# Generation of conversion tables used for string normalization with
|
||||||
# UTF-8 strings.
|
# UTF-8 strings.
|
||||||
unicode_norm_hashfunc.h: unicode_norm_table.h
|
unicode_norm_hashfunc.h: unicode_norm_table.h
|
||||||
@ -45,9 +55,14 @@ unicode_normprops_table.h: generate-unicode_normprops_table.pl DerivedNormalizat
|
|||||||
$(PERL) $^ >$@
|
$(PERL) $^ >$@
|
||||||
|
|
||||||
# Test suite
|
# Test suite
|
||||||
|
category-check: category_test
|
||||||
|
./category_test
|
||||||
|
|
||||||
normalization-check: norm_test
|
normalization-check: norm_test
|
||||||
./norm_test
|
./norm_test
|
||||||
|
|
||||||
|
category_test: category_test.o ../unicode_category.o | submake-common
|
||||||
|
|
||||||
norm_test: norm_test.o ../unicode_norm.o | submake-common
|
norm_test: norm_test.o ../unicode_norm.o | submake-common
|
||||||
|
|
||||||
norm_test.o: norm_test_table.h
|
norm_test.o: norm_test_table.h
|
||||||
@ -64,7 +79,7 @@ norm_test_table.h: generate-norm_test_table.pl NormalizationTest.txt
|
|||||||
|
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
rm -f $(OBJS) norm_test norm_test.o
|
rm -f $(OBJS) category_test category_test.o norm_test norm_test.o
|
||||||
|
|
||||||
distclean: clean
|
distclean: clean
|
||||||
rm -f UnicodeData.txt EastAsianWidth.txt CompositionExclusions.txt NormalizationTest.txt norm_test_table.h unicode_norm_table.h
|
rm -f UnicodeData.txt EastAsianWidth.txt CompositionExclusions.txt NormalizationTest.txt norm_test_table.h unicode_norm_table.h
|
||||||
|
108
src/common/unicode/category_test.c
Normal file
108
src/common/unicode/category_test.c
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
* category_test.c
|
||||||
|
* Program to test Unicode general category functions.
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* src/common/unicode/category_test.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#include "postgres_fe.h"
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifdef USE_ICU
|
||||||
|
#include <unicode/uchar.h>
|
||||||
|
#endif
|
||||||
|
#include "common/unicode_category.h"
|
||||||
|
#include "common/unicode_version.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Parse version into integer for easy comparison.
|
||||||
|
*/
|
||||||
|
#ifdef USE_ICU
|
||||||
|
static int
|
||||||
|
parse_unicode_version(const char *version)
|
||||||
|
{
|
||||||
|
int n,
|
||||||
|
major,
|
||||||
|
minor;
|
||||||
|
|
||||||
|
n = sscanf(version, "%d.%d", &major, &minor);
|
||||||
|
|
||||||
|
Assert(n == 2);
|
||||||
|
Assert(minor < 100);
|
||||||
|
|
||||||
|
return major * 100 + minor;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Exhaustively test that the Unicode category for each codepoint matches that
|
||||||
|
* returned by ICU.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
#ifdef USE_ICU
|
||||||
|
int pg_unicode_version = parse_unicode_version(PG_UNICODE_VERSION);
|
||||||
|
int icu_unicode_version = parse_unicode_version(U_UNICODE_VERSION);
|
||||||
|
int pg_skipped_codepoints = 0;
|
||||||
|
int icu_skipped_codepoints = 0;
|
||||||
|
|
||||||
|
printf("Postgres Unicode Version:\t%s\n", PG_UNICODE_VERSION);
|
||||||
|
printf("ICU Unicode Version:\t\t%s\n", U_UNICODE_VERSION);
|
||||||
|
|
||||||
|
for (UChar32 code = 0; code <= 0x10ffff; code++)
|
||||||
|
{
|
||||||
|
uint8_t pg_category = unicode_category(code);
|
||||||
|
uint8_t icu_category = u_charType(code);
|
||||||
|
|
||||||
|
if (pg_category != icu_category)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* A version mismatch means that some assigned codepoints in the
|
||||||
|
* newer version may be unassigned in the older version. That's
|
||||||
|
* OK, though the test will not cover those codepoints marked
|
||||||
|
* unassigned in the older version (that is, it will no longer be
|
||||||
|
* an exhaustive test).
|
||||||
|
*/
|
||||||
|
if (pg_category == PG_U_UNASSIGNED &&
|
||||||
|
pg_unicode_version < icu_unicode_version)
|
||||||
|
pg_skipped_codepoints++;
|
||||||
|
else if (icu_category == PG_U_UNASSIGNED &&
|
||||||
|
icu_unicode_version < pg_unicode_version)
|
||||||
|
icu_skipped_codepoints++;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("FAILURE for codepoint %06x\n", code);
|
||||||
|
printf("Postgres category: %02d %s %s\n", pg_category,
|
||||||
|
unicode_category_abbrev(pg_category),
|
||||||
|
unicode_category_string(pg_category));
|
||||||
|
printf("ICU category: %02d %s %s\n", icu_category,
|
||||||
|
unicode_category_abbrev(icu_category),
|
||||||
|
unicode_category_string(icu_category));
|
||||||
|
printf("\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pg_skipped_codepoints > 0)
|
||||||
|
printf("Skipped %d codepoints unassigned in Postgres due to Unicode version mismatch.\n",
|
||||||
|
pg_skipped_codepoints);
|
||||||
|
if (icu_skipped_codepoints > 0)
|
||||||
|
printf("Skipped %d codepoints unassigned in ICU due to Unicode version mismatch.\n",
|
||||||
|
icu_skipped_codepoints);
|
||||||
|
|
||||||
|
printf("category_test: All tests successful!\n");
|
||||||
|
exit(0);
|
||||||
|
#else
|
||||||
|
printf("ICU support required for test; skipping.\n");
|
||||||
|
exit(0);
|
||||||
|
#endif
|
||||||
|
}
|
204
src/common/unicode/generate-unicode_category_table.pl
Normal file
204
src/common/unicode/generate-unicode_category_table.pl
Normal file
@ -0,0 +1,204 @@
|
|||||||
|
#!/usr/bin/perl
|
||||||
|
#
|
||||||
|
# Generate a code point category table and its lookup utilities, using
|
||||||
|
# Unicode data files as input.
|
||||||
|
#
|
||||||
|
# Input: UnicodeData.txt
|
||||||
|
# Output: unicode_category_table.h
|
||||||
|
#
|
||||||
|
# Copyright (c) 2000-2023, PostgreSQL Global Development Group
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use Getopt::Long;
|
||||||
|
|
||||||
|
use FindBin;
|
||||||
|
use lib "$FindBin::RealBin/../../tools/";
|
||||||
|
|
||||||
|
my $CATEGORY_UNASSIGNED = 'Cn';
|
||||||
|
|
||||||
|
my $output_path = '.';
|
||||||
|
|
||||||
|
GetOptions('outdir:s' => \$output_path);
|
||||||
|
|
||||||
|
my $output_table_file = "$output_path/unicode_category_table.h";
|
||||||
|
|
||||||
|
my $FH;
|
||||||
|
|
||||||
|
# Read entries from UnicodeData.txt into a list of codepoint ranges
|
||||||
|
# and their general category.
|
||||||
|
my @category_ranges = ();
|
||||||
|
my $range_start = undef;
|
||||||
|
my $range_end = undef;
|
||||||
|
my $range_category = undef;
|
||||||
|
|
||||||
|
# If between a "<..., First>" entry and a "<..., Last>" entry, the gap in
|
||||||
|
# codepoints represents a range, and $gap_category is equal to the
|
||||||
|
# category for both (which must match). Otherwise, the gap represents
|
||||||
|
# unassigned code points.
|
||||||
|
my $gap_category = undef;
|
||||||
|
|
||||||
|
open($FH, '<', "$output_path/UnicodeData.txt")
|
||||||
|
or die "Could not open $output_path/UnicodeData.txt: $!.";
|
||||||
|
while (my $line = <$FH>)
|
||||||
|
{
|
||||||
|
my @elts = split(';', $line);
|
||||||
|
my $code = hex($elts[0]);
|
||||||
|
my $name = $elts[1];
|
||||||
|
my $category = $elts[2];
|
||||||
|
|
||||||
|
die "codepoint out of range" if $code > 0x10FFFF;
|
||||||
|
die "unassigned codepoint in UnicodeData.txt" if $category eq $CATEGORY_UNASSIGNED;
|
||||||
|
|
||||||
|
if (!defined($range_start)) {
|
||||||
|
my $code_str = sprintf "0x%06x", $code;
|
||||||
|
die if defined($range_end) || defined($range_category) || defined($gap_category);
|
||||||
|
die "unexpected first entry <..., Last>" if ($name =~ /Last>/);
|
||||||
|
die "expected 0x000000 for first entry, got $code_str" if $code != 0x000000;
|
||||||
|
|
||||||
|
# initialize
|
||||||
|
$range_start = $code;
|
||||||
|
$range_end = $code;
|
||||||
|
$range_category = $category;
|
||||||
|
if ($name =~ /<.*, First>$/) {
|
||||||
|
$gap_category = $category;
|
||||||
|
} else {
|
||||||
|
$gap_category = $CATEGORY_UNASSIGNED;
|
||||||
|
}
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Gap in codepoints detected. If it's a different category than
|
||||||
|
# the current range, emit the current range and initialize a new
|
||||||
|
# range representing the gap.
|
||||||
|
if ($range_end + 1 != $code && $range_category ne $gap_category) {
|
||||||
|
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
|
||||||
|
$range_start = $range_end + 1;
|
||||||
|
$range_end = $code - 1;
|
||||||
|
$range_category = $gap_category;
|
||||||
|
}
|
||||||
|
|
||||||
|
# different category; new range
|
||||||
|
if ($range_category ne $category) {
|
||||||
|
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
|
||||||
|
$range_start = $code;
|
||||||
|
$range_end = $code;
|
||||||
|
$range_category = $category;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($name =~ /<.*, First>$/) {
|
||||||
|
die "<..., First> entry unexpectedly follows another <..., First> entry"
|
||||||
|
if $gap_category ne $CATEGORY_UNASSIGNED;
|
||||||
|
$gap_category = $category;
|
||||||
|
}
|
||||||
|
elsif ($name =~ /<.*, Last>$/) {
|
||||||
|
die "<..., First> and <..., Last> entries have mismatching general category"
|
||||||
|
if $gap_category ne $category;
|
||||||
|
$gap_category = $CATEGORY_UNASSIGNED;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
die "unexpected entry found between <..., First> and <..., Last>"
|
||||||
|
if $gap_category ne $CATEGORY_UNASSIGNED;
|
||||||
|
}
|
||||||
|
|
||||||
|
$range_end = $code;
|
||||||
|
}
|
||||||
|
close $FH;
|
||||||
|
|
||||||
|
die "<..., First> entry with no corresponding <..., Last> entry"
|
||||||
|
if $gap_category ne $CATEGORY_UNASSIGNED;
|
||||||
|
|
||||||
|
# emit final range
|
||||||
|
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
|
||||||
|
|
||||||
|
# emit range for any unassigned code points after last entry
|
||||||
|
if ($range_end < 0x10FFFF) {
|
||||||
|
$range_start = $range_end + 1;
|
||||||
|
$range_end = 0x10FFFF;
|
||||||
|
$range_category = $CATEGORY_UNASSIGNED;
|
||||||
|
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
|
||||||
|
}
|
||||||
|
|
||||||
|
my $num_ranges = scalar @category_ranges;
|
||||||
|
|
||||||
|
# See: https://www.unicode.org/reports/tr44/#General_Category_Values
|
||||||
|
my $categories = {
|
||||||
|
Cn => 'PG_U_UNASSIGNED',
|
||||||
|
Lu => 'PG_U_UPPERCASE_LETTER',
|
||||||
|
Ll => 'PG_U_LOWERCASE_LETTER',
|
||||||
|
Lt => 'PG_U_TITLECASE_LETTER',
|
||||||
|
Lm => 'PG_U_MODIFIER_LETTER',
|
||||||
|
Lo => 'PG_U_OTHER_LETTER',
|
||||||
|
Mn => 'PG_U_NONSPACING_MARK',
|
||||||
|
Me => 'PG_U_ENCLOSING_MARK',
|
||||||
|
Mc => 'PG_U_SPACING_MARK',
|
||||||
|
Nd => 'PG_U_DECIMAL_NUMBER',
|
||||||
|
Nl => 'PG_U_LETTER_NUMBER',
|
||||||
|
No => 'PG_U_OTHER_NUMBER',
|
||||||
|
Zs => 'PG_U_SPACE_SEPARATOR',
|
||||||
|
Zl => 'PG_U_LINE_SEPARATOR',
|
||||||
|
Zp => 'PG_U_PARAGRAPH_SEPARATOR',
|
||||||
|
Cc => 'PG_U_CONTROL',
|
||||||
|
Cf => 'PG_U_FORMAT',
|
||||||
|
Co => 'PG_U_PRIVATE_USE',
|
||||||
|
Cs => 'PG_U_SURROGATE',
|
||||||
|
Pd => 'PG_U_DASH_PUNCTUATION',
|
||||||
|
Ps => 'PG_U_OPEN_PUNCTUATION',
|
||||||
|
Pe => 'PG_U_CLOSE_PUNCTUATION',
|
||||||
|
Pc => 'PG_U_CONNECTOR_PUNCTUATION',
|
||||||
|
Po => 'PG_U_OTHER_PUNCTUATION',
|
||||||
|
Sm => 'PG_U_MATH_SYMBOL',
|
||||||
|
Sc => 'PG_U_CURRENCY_SYMBOL',
|
||||||
|
Sk => 'PG_U_MODIFIER_SYMBOL',
|
||||||
|
So => 'PG_U_OTHER_SYMBOL',
|
||||||
|
Pi => 'PG_U_INITIAL_PUNCTUATION',
|
||||||
|
Pf => 'PG_U_FINAL_PUNCTUATION'
|
||||||
|
};
|
||||||
|
|
||||||
|
# Start writing out the output files
|
||||||
|
open my $OT, '>', $output_table_file
|
||||||
|
or die "Could not open output file $output_table_file: $!\n";
|
||||||
|
|
||||||
|
print $OT <<HEADER;
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* unicode_category_table.h
|
||||||
|
* Category table for Unicode character classification.
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
*
|
||||||
|
* src/include/common/unicode_category_table.h
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "common/unicode_category.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* File auto-generated by src/common/unicode/generate-unicode_category_table.pl,
|
||||||
|
* do not edit. There is deliberately not an #ifndef PG_UNICODE_CATEGORY_TABLE_H
|
||||||
|
* here.
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
uint32 first; /* Unicode codepoint */
|
||||||
|
uint32 last; /* Unicode codepoint */
|
||||||
|
uint8 category; /* General Category */
|
||||||
|
} pg_category_range;
|
||||||
|
|
||||||
|
/* table of Unicode codepoint ranges and their categories */
|
||||||
|
static const pg_category_range unicode_categories[$num_ranges] =
|
||||||
|
{
|
||||||
|
HEADER
|
||||||
|
|
||||||
|
my $firsttime = 1;
|
||||||
|
foreach my $range (@category_ranges) {
|
||||||
|
printf $OT ",\n" unless $firsttime;
|
||||||
|
$firsttime = 0;
|
||||||
|
|
||||||
|
my $category = $categories->{$range->{category}};
|
||||||
|
die "category missing: $range->{category}" unless $category;
|
||||||
|
printf $OT "\t{0x%06x, 0x%06x, %s}", $range->{start}, $range->{end}, $category;
|
||||||
|
}
|
||||||
|
print $OT "\n};\n";
|
46
src/common/unicode/generate-unicode_version.pl
Normal file
46
src/common/unicode/generate-unicode_version.pl
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
#!/usr/bin/perl
|
||||||
|
#
|
||||||
|
# Generate header file with Unicode version used by Postgres.
|
||||||
|
#
|
||||||
|
# Output: unicode_version.h
|
||||||
|
#
|
||||||
|
# Copyright (c) 2000-2023, PostgreSQL Global Development Group
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use Getopt::Long;
|
||||||
|
|
||||||
|
use FindBin;
|
||||||
|
use lib "$FindBin::RealBin/../../tools/";
|
||||||
|
|
||||||
|
my $output_path = '.';
|
||||||
|
my $version_str = undef;
|
||||||
|
|
||||||
|
GetOptions('outdir:s' => \$output_path, 'version:s' => \$version_str);
|
||||||
|
|
||||||
|
my @version_parts = split /\./, $version_str;
|
||||||
|
|
||||||
|
my $unicode_version_str = sprintf "%d.%d", $version_parts[0], $version_parts[1];
|
||||||
|
|
||||||
|
my $output_file = "$output_path/unicode_version.h";
|
||||||
|
|
||||||
|
# Start writing out the output files
|
||||||
|
open my $OT, '>', $output_file
|
||||||
|
or die "Could not open output file $output_file: $!\n";
|
||||||
|
|
||||||
|
print $OT <<HEADER;
|
||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* unicode_version.h
|
||||||
|
* Unicode version used by Postgres.
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
*
|
||||||
|
* src/include/common/unicode_version.h
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define PG_UNICODE_VERSION "$unicode_version_str"
|
||||||
|
HEADER
|
@ -24,6 +24,25 @@ endforeach
|
|||||||
|
|
||||||
update_unicode_targets = []
|
update_unicode_targets = []
|
||||||
|
|
||||||
|
update_unicode_targets += \
|
||||||
|
custom_target('unicode_version.h',
|
||||||
|
output: ['unicode_version.h'],
|
||||||
|
command: [
|
||||||
|
perl, files('generate-unicode_version.pl'),
|
||||||
|
'--outdir', '@OUTDIR@', '--version', UNICODE_VERSION],
|
||||||
|
build_by_default: false,
|
||||||
|
)
|
||||||
|
|
||||||
|
update_unicode_targets += \
|
||||||
|
custom_target('unicode_category_table.h',
|
||||||
|
input: [unicode_data['UnicodeData.txt']],
|
||||||
|
output: ['unicode_category_table.h'],
|
||||||
|
command: [
|
||||||
|
perl, files('generate-unicode_category_table.pl'),
|
||||||
|
'--outdir', '@OUTDIR@', '@INPUT@'],
|
||||||
|
build_by_default: false,
|
||||||
|
)
|
||||||
|
|
||||||
update_unicode_targets += \
|
update_unicode_targets += \
|
||||||
custom_target('unicode_norm_table.h',
|
custom_target('unicode_norm_table.h',
|
||||||
input: [unicode_data['UnicodeData.txt'], unicode_data['CompositionExclusions.txt']],
|
input: [unicode_data['UnicodeData.txt'], unicode_data['CompositionExclusions.txt']],
|
||||||
@ -73,6 +92,17 @@ norm_test_table = custom_target('norm_test_table.h',
|
|||||||
|
|
||||||
inc = include_directories('.')
|
inc = include_directories('.')
|
||||||
|
|
||||||
|
category_test = executable('category_test',
|
||||||
|
['category_test.c'],
|
||||||
|
dependencies: [frontend_port_code, icu],
|
||||||
|
include_directories: inc,
|
||||||
|
link_with: [common_static, pgport_static],
|
||||||
|
build_by_default: false,
|
||||||
|
kwargs: default_bin_args + {
|
||||||
|
'install': false,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
norm_test = executable('norm_test',
|
norm_test = executable('norm_test',
|
||||||
['norm_test.c', norm_test_table],
|
['norm_test.c', norm_test_table],
|
||||||
dependencies: [frontend_port_code],
|
dependencies: [frontend_port_code],
|
||||||
@ -86,6 +116,16 @@ norm_test = executable('norm_test',
|
|||||||
|
|
||||||
update_unicode_dep = []
|
update_unicode_dep = []
|
||||||
|
|
||||||
|
if not meson.is_cross_build()
|
||||||
|
update_unicode_dep += custom_target('category_test.run',
|
||||||
|
output: 'category_test.run',
|
||||||
|
input: update_unicode_targets,
|
||||||
|
command: [category_test, UNICODE_VERSION],
|
||||||
|
build_by_default: false,
|
||||||
|
build_always_stale: true,
|
||||||
|
)
|
||||||
|
endif
|
||||||
|
|
||||||
if not meson.is_cross_build()
|
if not meson.is_cross_build()
|
||||||
update_unicode_dep += custom_target('norm_test.run',
|
update_unicode_dep += custom_target('norm_test.run',
|
||||||
output: 'norm_test.run',
|
output: 'norm_test.run',
|
||||||
|
@ -81,6 +81,6 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("All tests successful!\n");
|
printf("norm_test: All tests successful!\n");
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
195
src/common/unicode_category.c
Normal file
195
src/common/unicode_category.c
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
* unicode_category.c
|
||||||
|
* Determine general category of Unicode characters.
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* IDENTIFICATION
|
||||||
|
* src/common/unicode_category.c
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#ifndef FRONTEND
|
||||||
|
#include "postgres.h"
|
||||||
|
#else
|
||||||
|
#include "postgres_fe.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "common/unicode_category.h"
|
||||||
|
#include "common/unicode_category_table.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unicode general category for the given codepoint.
|
||||||
|
*/
|
||||||
|
pg_unicode_category
|
||||||
|
unicode_category(pg_wchar ucs)
|
||||||
|
{
|
||||||
|
int min = 0;
|
||||||
|
int mid;
|
||||||
|
int max = lengthof(unicode_categories) - 1;
|
||||||
|
|
||||||
|
Assert(ucs >= unicode_categories[0].first &&
|
||||||
|
ucs <= unicode_categories[max].last);
|
||||||
|
|
||||||
|
while (max >= min)
|
||||||
|
{
|
||||||
|
mid = (min + max) / 2;
|
||||||
|
if (ucs > unicode_categories[mid].last)
|
||||||
|
min = mid + 1;
|
||||||
|
else if (ucs < unicode_categories[mid].first)
|
||||||
|
max = mid - 1;
|
||||||
|
else
|
||||||
|
return unicode_categories[mid].category;
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert(false);
|
||||||
|
return (pg_unicode_category) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Description of Unicode general category.
|
||||||
|
*/
|
||||||
|
const char *
|
||||||
|
unicode_category_string(pg_unicode_category category)
|
||||||
|
{
|
||||||
|
switch (category)
|
||||||
|
{
|
||||||
|
case PG_U_UNASSIGNED:
|
||||||
|
return "Unassigned";
|
||||||
|
case PG_U_UPPERCASE_LETTER:
|
||||||
|
return "Uppercase_Letter";
|
||||||
|
case PG_U_LOWERCASE_LETTER:
|
||||||
|
return "Lowercase_Letter";
|
||||||
|
case PG_U_TITLECASE_LETTER:
|
||||||
|
return "Titlecase_Letter";
|
||||||
|
case PG_U_MODIFIER_LETTER:
|
||||||
|
return "Modifier_Letter";
|
||||||
|
case PG_U_OTHER_LETTER:
|
||||||
|
return "Other_Letter";
|
||||||
|
case PG_U_NONSPACING_MARK:
|
||||||
|
return "Nonspacing_Mark";
|
||||||
|
case PG_U_ENCLOSING_MARK:
|
||||||
|
return "Enclosing_Mark";
|
||||||
|
case PG_U_SPACING_MARK:
|
||||||
|
return "Spacing_Mark";
|
||||||
|
case PG_U_DECIMAL_NUMBER:
|
||||||
|
return "Decimal_Number";
|
||||||
|
case PG_U_LETTER_NUMBER:
|
||||||
|
return "Letter_Number";
|
||||||
|
case PG_U_OTHER_NUMBER:
|
||||||
|
return "Other_Number";
|
||||||
|
case PG_U_SPACE_SEPARATOR:
|
||||||
|
return "Space_Separator";
|
||||||
|
case PG_U_LINE_SEPARATOR:
|
||||||
|
return "Line_Separator";
|
||||||
|
case PG_U_PARAGRAPH_SEPARATOR:
|
||||||
|
return "Paragraph_Separator";
|
||||||
|
case PG_U_CONTROL:
|
||||||
|
return "Control";
|
||||||
|
case PG_U_FORMAT:
|
||||||
|
return "Format";
|
||||||
|
case PG_U_PRIVATE_USE:
|
||||||
|
return "Private_Use";
|
||||||
|
case PG_U_SURROGATE:
|
||||||
|
return "Surrogate";
|
||||||
|
case PG_U_DASH_PUNCTUATION:
|
||||||
|
return "Dash_Punctuation";
|
||||||
|
case PG_U_OPEN_PUNCTUATION:
|
||||||
|
return "Open_Punctuation";
|
||||||
|
case PG_U_CLOSE_PUNCTUATION:
|
||||||
|
return "Close_Punctuation";
|
||||||
|
case PG_U_CONNECTOR_PUNCTUATION:
|
||||||
|
return "Connector_Punctuation";
|
||||||
|
case PG_U_OTHER_PUNCTUATION:
|
||||||
|
return "Other_Punctuation";
|
||||||
|
case PG_U_MATH_SYMBOL:
|
||||||
|
return "Math_Symbol";
|
||||||
|
case PG_U_CURRENCY_SYMBOL:
|
||||||
|
return "Currency_Symbol";
|
||||||
|
case PG_U_MODIFIER_SYMBOL:
|
||||||
|
return "Modifier_Symbol";
|
||||||
|
case PG_U_OTHER_SYMBOL:
|
||||||
|
return "Other_Symbol";
|
||||||
|
case PG_U_INITIAL_PUNCTUATION:
|
||||||
|
return "Initial_Punctuation";
|
||||||
|
case PG_U_FINAL_PUNCTUATION:
|
||||||
|
return "Final_Punctuation";
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert(false);
|
||||||
|
return "Unrecognized"; /* keep compiler quiet */
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Short code for Unicode general category.
|
||||||
|
*/
|
||||||
|
const char *
|
||||||
|
unicode_category_abbrev(pg_unicode_category category)
|
||||||
|
{
|
||||||
|
switch (category)
|
||||||
|
{
|
||||||
|
case PG_U_UNASSIGNED:
|
||||||
|
return "Cn";
|
||||||
|
case PG_U_UPPERCASE_LETTER:
|
||||||
|
return "Lu";
|
||||||
|
case PG_U_LOWERCASE_LETTER:
|
||||||
|
return "Ll";
|
||||||
|
case PG_U_TITLECASE_LETTER:
|
||||||
|
return "Lt";
|
||||||
|
case PG_U_MODIFIER_LETTER:
|
||||||
|
return "Lm";
|
||||||
|
case PG_U_OTHER_LETTER:
|
||||||
|
return "Lo";
|
||||||
|
case PG_U_NONSPACING_MARK:
|
||||||
|
return "Mn";
|
||||||
|
case PG_U_ENCLOSING_MARK:
|
||||||
|
return "Me";
|
||||||
|
case PG_U_SPACING_MARK:
|
||||||
|
return "Mc";
|
||||||
|
case PG_U_DECIMAL_NUMBER:
|
||||||
|
return "Nd";
|
||||||
|
case PG_U_LETTER_NUMBER:
|
||||||
|
return "Nl";
|
||||||
|
case PG_U_OTHER_NUMBER:
|
||||||
|
return "No";
|
||||||
|
case PG_U_SPACE_SEPARATOR:
|
||||||
|
return "Zs";
|
||||||
|
case PG_U_LINE_SEPARATOR:
|
||||||
|
return "Zl";
|
||||||
|
case PG_U_PARAGRAPH_SEPARATOR:
|
||||||
|
return "Zp";
|
||||||
|
case PG_U_CONTROL:
|
||||||
|
return "Cc";
|
||||||
|
case PG_U_FORMAT:
|
||||||
|
return "Cf";
|
||||||
|
case PG_U_PRIVATE_USE:
|
||||||
|
return "Co";
|
||||||
|
case PG_U_SURROGATE:
|
||||||
|
return "Cs";
|
||||||
|
case PG_U_DASH_PUNCTUATION:
|
||||||
|
return "Pd";
|
||||||
|
case PG_U_OPEN_PUNCTUATION:
|
||||||
|
return "Ps";
|
||||||
|
case PG_U_CLOSE_PUNCTUATION:
|
||||||
|
return "Pe";
|
||||||
|
case PG_U_CONNECTOR_PUNCTUATION:
|
||||||
|
return "Pc";
|
||||||
|
case PG_U_OTHER_PUNCTUATION:
|
||||||
|
return "Po";
|
||||||
|
case PG_U_MATH_SYMBOL:
|
||||||
|
return "Sm";
|
||||||
|
case PG_U_CURRENCY_SYMBOL:
|
||||||
|
return "Sc";
|
||||||
|
case PG_U_MODIFIER_SYMBOL:
|
||||||
|
return "Sk";
|
||||||
|
case PG_U_OTHER_SYMBOL:
|
||||||
|
return "So";
|
||||||
|
case PG_U_INITIAL_PUNCTUATION:
|
||||||
|
return "Pi";
|
||||||
|
case PG_U_FINAL_PUNCTUATION:
|
||||||
|
return "Pf";
|
||||||
|
}
|
||||||
|
|
||||||
|
Assert(false);
|
||||||
|
return "??"; /* keep compiler quiet */
|
||||||
|
}
|
@ -12019,6 +12019,18 @@
|
|||||||
proname => 'pg_partition_root', prorettype => 'regclass',
|
proname => 'pg_partition_root', prorettype => 'regclass',
|
||||||
proargtypes => 'regclass', prosrc => 'pg_partition_root' },
|
proargtypes => 'regclass', prosrc => 'pg_partition_root' },
|
||||||
|
|
||||||
|
{ oid => '4549', descr => 'Unicode version used by Postgres',
|
||||||
|
proname => 'unicode_version', prorettype => 'text', proargtypes => '',
|
||||||
|
prosrc => 'unicode_version' },
|
||||||
|
|
||||||
|
{ oid => '6099', descr => 'Unicode version used by ICU, if enabled',
|
||||||
|
proname => 'icu_unicode_version', prorettype => 'text', proargtypes => '',
|
||||||
|
prosrc => 'icu_unicode_version' },
|
||||||
|
|
||||||
|
{ oid => '6105', descr => 'check valid Unicode',
|
||||||
|
proname => 'unicode_assigned', prorettype => 'bool', proargtypes => 'text',
|
||||||
|
prosrc => 'unicode_assigned' },
|
||||||
|
|
||||||
{ oid => '4350', descr => 'Unicode normalization',
|
{ oid => '4350', descr => 'Unicode normalization',
|
||||||
proname => 'normalize', prorettype => 'text', proargtypes => 'text text',
|
proname => 'normalize', prorettype => 'text', proargtypes => 'text text',
|
||||||
prosrc => 'unicode_normalize_func' },
|
prosrc => 'unicode_normalize_func' },
|
||||||
|
68
src/include/common/unicode_category.h
Normal file
68
src/include/common/unicode_category.h
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* unicode_category.h
|
||||||
|
* Routines for determining the category of Unicode characters.
|
||||||
|
*
|
||||||
|
* These definitions can be used by both frontend and backend code.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2017-2023, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* src/include/common/unicode_category.h
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
#ifndef UNICODE_CATEGORY_H
|
||||||
|
#define UNICODE_CATEGORY_H
|
||||||
|
|
||||||
|
#include "mb/pg_wchar.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unicode General Category Values
|
||||||
|
*
|
||||||
|
* See: https://www.unicode.org/reports/tr44/#General_Category_Values
|
||||||
|
*
|
||||||
|
* The Unicode stability policy guarantees: "The enumeration of
|
||||||
|
* General_Category property values is fixed. No new values will be
|
||||||
|
* added". See: https://www.unicode.org/policies/stability_policy.html
|
||||||
|
*
|
||||||
|
* Numeric values chosen to match corresponding ICU UCharCategory.
|
||||||
|
*/
|
||||||
|
typedef enum pg_unicode_category
|
||||||
|
{
|
||||||
|
PG_U_UNASSIGNED = 0, /* Cn */
|
||||||
|
PG_U_UPPERCASE_LETTER = 1, /* Lu */
|
||||||
|
PG_U_LOWERCASE_LETTER = 2, /* Ll */
|
||||||
|
PG_U_TITLECASE_LETTER = 3, /* Lt */
|
||||||
|
PG_U_MODIFIER_LETTER = 4, /* Lm */
|
||||||
|
PG_U_OTHER_LETTER = 5, /* Lo */
|
||||||
|
PG_U_NONSPACING_MARK = 6, /* Mn */
|
||||||
|
PG_U_ENCLOSING_MARK = 7, /* Me */
|
||||||
|
PG_U_SPACING_MARK = 8, /* Mc */
|
||||||
|
PG_U_DECIMAL_NUMBER = 9, /* Nd */
|
||||||
|
PG_U_LETTER_NUMBER = 10, /* Nl */
|
||||||
|
PG_U_OTHER_NUMBER = 11, /* No */
|
||||||
|
PG_U_SPACE_SEPARATOR = 12, /* Zs */
|
||||||
|
PG_U_LINE_SEPARATOR = 13, /* Zl */
|
||||||
|
PG_U_PARAGRAPH_SEPARATOR = 14, /* Zp */
|
||||||
|
PG_U_CONTROL = 15, /* Cc */
|
||||||
|
PG_U_FORMAT = 16, /* Cf */
|
||||||
|
PG_U_PRIVATE_USE = 17, /* Co */
|
||||||
|
PG_U_SURROGATE = 18, /* Cs */
|
||||||
|
PG_U_DASH_PUNCTUATION = 19, /* Pd */
|
||||||
|
PG_U_OPEN_PUNCTUATION = 20, /* Ps */
|
||||||
|
PG_U_CLOSE_PUNCTUATION = 21, /* Pe */
|
||||||
|
PG_U_CONNECTOR_PUNCTUATION = 22, /* Pc */
|
||||||
|
PG_U_OTHER_PUNCTUATION = 23, /* Po */
|
||||||
|
PG_U_MATH_SYMBOL = 24, /* Sm */
|
||||||
|
PG_U_CURRENCY_SYMBOL = 25, /* Sc */
|
||||||
|
PG_U_MODIFIER_SYMBOL = 26, /* Sk */
|
||||||
|
PG_U_OTHER_SYMBOL = 27, /* So */
|
||||||
|
PG_U_INITIAL_PUNCTUATION = 28, /* Pi */
|
||||||
|
PG_U_FINAL_PUNCTUATION = 29 /* Pf */
|
||||||
|
} pg_unicode_category;
|
||||||
|
|
||||||
|
extern pg_unicode_category unicode_category(pg_wchar ucs);
|
||||||
|
const char *unicode_category_string(pg_unicode_category category);
|
||||||
|
const char *unicode_category_abbrev(pg_unicode_category category);
|
||||||
|
|
||||||
|
#endif /* UNICODE_CATEGORY_H */
|
4040
src/include/common/unicode_category_table.h
Normal file
4040
src/include/common/unicode_category_table.h
Normal file
File diff suppressed because it is too large
Load Diff
14
src/include/common/unicode_version.h
Normal file
14
src/include/common/unicode_version.h
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
/*-------------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* unicode_version.h
|
||||||
|
* Unicode version used by Postgres.
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
||||||
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
|
*
|
||||||
|
* src/include/common/unicode_version.h
|
||||||
|
*
|
||||||
|
*-------------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define PG_UNICODE_VERSION "15.1"
|
@ -240,6 +240,16 @@ struct config_real
|
|||||||
void *reset_extra;
|
void *reset_extra;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A note about string GUCs: the boot_val is allowed to be NULL, which leads
|
||||||
|
* to the reset_val and the actual variable value (*variable) also being NULL.
|
||||||
|
* However, there is no way to set a NULL value subsequently using
|
||||||
|
* set_config_option or any other GUC API. Also, GUC APIs such as SHOW will
|
||||||
|
* display a NULL value as an empty string. Callers that choose to use a NULL
|
||||||
|
* boot_val should overwrite the setting later in startup, or else be careful
|
||||||
|
* that NULL doesn't have semantics that are visibly different from an empty
|
||||||
|
* string.
|
||||||
|
*/
|
||||||
struct config_string
|
struct config_string
|
||||||
{
|
{
|
||||||
struct config_generic gen;
|
struct config_generic gen;
|
||||||
|
@ -27,6 +27,10 @@ CREATE TABLE icu (def text, en text COLLATE "en-x-icu", upfirst text COLLATE upp
|
|||||||
INSERT INTO icu VALUES ('a', 'a', 'a'), ('b', 'b', 'b'), ('A', 'A', 'A'), ('B', 'B', 'B');
|
INSERT INTO icu VALUES ('a', 'a', 'a'), ('b', 'b', 'b'), ('A', 'A', 'A'), ('B', 'B', 'B');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
is( $node1->safe_psql('dbicu', q{SELECT icu_unicode_version() IS NOT NULL}),
|
||||||
|
qq(t),
|
||||||
|
'ICU unicode version defined');
|
||||||
|
|
||||||
is( $node1->safe_psql('dbicu', q{SELECT def FROM icu ORDER BY def}),
|
is( $node1->safe_psql('dbicu', q{SELECT def FROM icu ORDER BY def}),
|
||||||
qq(A
|
qq(A
|
||||||
a
|
a
|
||||||
|
@ -8,6 +8,24 @@ SELECT U&'\0061\0308bc' <> U&'\00E4bc' COLLATE "C" AS sanity_check;
|
|||||||
t
|
t
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT unicode_version() IS NOT NULL;
|
||||||
|
?column?
|
||||||
|
----------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT unicode_assigned(U&'abc');
|
||||||
|
unicode_assigned
|
||||||
|
------------------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT unicode_assigned(U&'abc\+10FFFF');
|
||||||
|
unicode_assigned
|
||||||
|
------------------
|
||||||
|
f
|
||||||
|
(1 row)
|
||||||
|
|
||||||
SELECT normalize('');
|
SELECT normalize('');
|
||||||
normalize
|
normalize
|
||||||
-----------
|
-----------
|
||||||
|
@ -5,6 +5,10 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
|
|||||||
|
|
||||||
SELECT U&'\0061\0308bc' <> U&'\00E4bc' COLLATE "C" AS sanity_check;
|
SELECT U&'\0061\0308bc' <> U&'\00E4bc' COLLATE "C" AS sanity_check;
|
||||||
|
|
||||||
|
SELECT unicode_version() IS NOT NULL;
|
||||||
|
SELECT unicode_assigned(U&'abc');
|
||||||
|
SELECT unicode_assigned(U&'abc\+10FFFF');
|
||||||
|
|
||||||
SELECT normalize('');
|
SELECT normalize('');
|
||||||
SELECT normalize(U&'\0061\0308\24D1c') = U&'\00E4\24D1c' COLLATE "C" AS test_default;
|
SELECT normalize(U&'\0061\0308\24D1c') = U&'\00E4\24D1c' COLLATE "C" AS test_default;
|
||||||
SELECT normalize(U&'\0061\0308\24D1c', NFC) = U&'\00E4\24D1c' COLLATE "C" AS test_nfc;
|
SELECT normalize(U&'\0061\0308\24D1c', NFC) = U&'\00E4\24D1c' COLLATE "C" AS test_nfc;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user