mirror of
https://github.com/postgres/postgres.git
synced 2025-12-11 00:05:53 -05:00
Make regex "max_chr" depend on encoding, not provider.
The regex mechanism scans through the first "max_chr" character values to cache character property ranges (isalpha, etc.). For single-byte encodings, there's no sense in scanning beyond UCHAR_MAX; but for UTF-8 it makes sense to cache higher code point values (though not all of them; only up to MAX_SIMPLE_CHR). Prior to 5a38104b36, the logic about how many character values to scan was based on the pg_regex_strategy, which was dependent on the provider. Commit 5a38104b36 preserved that logic exactly, allowing different providers to define the "max_chr". Now, change it to depend only on the encoding and whether ctype_is_c. For this specific calculation, distinguishing between providers creates more complexity than it's worth. Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com Reviewed-by: Chao Li <li.evan.chao@gmail.com>
This commit is contained in:
parent
99cd8890be
commit
19b966243c
@ -320,16 +320,18 @@ regc_ctype_get_cache(regc_wc_probefunc probefunc, int cclasscode)
|
|||||||
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
else if (GetDatabaseEncoding() == PG_UTF8)
|
||||||
|
{
|
||||||
|
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (pg_regex_locale->ctype->max_chr != 0 &&
|
#if MAX_SIMPLE_CHR >= UCHAR_MAX
|
||||||
pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
|
max_chr = (pg_wchar) UCHAR_MAX;
|
||||||
{
|
pcc->cv.cclasscode = -1;
|
||||||
max_chr = pg_regex_locale->ctype->max_chr;
|
#else
|
||||||
pcc->cv.cclasscode = -1;
|
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
||||||
}
|
#endif
|
||||||
else
|
|
||||||
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
@ -342,7 +342,6 @@ static const struct ctype_methods ctype_methods_libc_sb = {
|
|||||||
.char_tolower = char_tolower_libc,
|
.char_tolower = char_tolower_libc,
|
||||||
.wc_toupper = toupper_libc_sb,
|
.wc_toupper = toupper_libc_sb,
|
||||||
.wc_tolower = tolower_libc_sb,
|
.wc_tolower = tolower_libc_sb,
|
||||||
.max_chr = UCHAR_MAX,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -369,7 +368,6 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
|
|||||||
.char_tolower = char_tolower_libc,
|
.char_tolower = char_tolower_libc,
|
||||||
.wc_toupper = toupper_libc_sb,
|
.wc_toupper = toupper_libc_sb,
|
||||||
.wc_tolower = tolower_libc_sb,
|
.wc_tolower = tolower_libc_sb,
|
||||||
.max_chr = UCHAR_MAX,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct ctype_methods ctype_methods_libc_utf8 = {
|
static const struct ctype_methods ctype_methods_libc_utf8 = {
|
||||||
|
|||||||
@ -134,12 +134,6 @@ struct ctype_methods
|
|||||||
* pg_strlower().
|
* pg_strlower().
|
||||||
*/
|
*/
|
||||||
char (*char_tolower) (unsigned char ch, pg_locale_t locale);
|
char (*char_tolower) (unsigned char ch, pg_locale_t locale);
|
||||||
|
|
||||||
/*
|
|
||||||
* For regex and pattern matching efficiency, the maximum char value
|
|
||||||
* supported by the above methods. If zero, limit is set by regex code.
|
|
||||||
*/
|
|
||||||
pg_wchar max_chr;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user