mirror of
https://github.com/postgres/postgres.git
synced 2025-05-22 00:02:02 -04:00
Change the way UESCAPE is lexed, to reduce the size of the flex tables.
The error rule used to avoid backtracking with the U&'...' UESCAPE 'x' syntax bloated the flex tables, so refactor that. This patch makes the error rule shorter, by introducing a new exclusive flex state that's entered after parsing U&'...'. This shrinks the postgres binary by about 220kB.
This commit is contained in:
parent
59d0bf9dca
commit
a5ff502fce
@ -97,6 +97,7 @@ static bool is_utf16_surrogate_first(pg_wchar c);
|
|||||||
static bool is_utf16_surrogate_second(pg_wchar c);
|
static bool is_utf16_surrogate_second(pg_wchar c);
|
||||||
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second);
|
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second);
|
||||||
static void addunicode(pg_wchar c, yyscan_t yyscanner);
|
static void addunicode(pg_wchar c, yyscan_t yyscanner);
|
||||||
|
static bool check_uescapechar(unsigned char escape);
|
||||||
|
|
||||||
#define yyerror(msg) scanner_yyerror(msg, yyscanner)
|
#define yyerror(msg) scanner_yyerror(msg, yyscanner)
|
||||||
|
|
||||||
@ -150,7 +151,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
|
|||||||
* <xe> extended quoted strings (support backslash escape sequences)
|
* <xe> extended quoted strings (support backslash escape sequences)
|
||||||
* <xdolq> $foo$ quoted strings
|
* <xdolq> $foo$ quoted strings
|
||||||
* <xui> quoted identifier with Unicode escapes
|
* <xui> quoted identifier with Unicode escapes
|
||||||
|
* <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
|
||||||
* <xus> quoted string with Unicode escapes
|
* <xus> quoted string with Unicode escapes
|
||||||
|
* <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
|
||||||
* <xeu> Unicode surrogate pair in extended quoted string
|
* <xeu> Unicode surrogate pair in extended quoted string
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -162,7 +165,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
|
|||||||
%x xq
|
%x xq
|
||||||
%x xdolq
|
%x xdolq
|
||||||
%x xui
|
%x xui
|
||||||
|
%x xuiend
|
||||||
%x xus
|
%x xus
|
||||||
|
%x xusend
|
||||||
%x xeu
|
%x xeu
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -279,17 +284,17 @@ xdinside [^"]+
|
|||||||
/* Unicode escapes */
|
/* Unicode escapes */
|
||||||
uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
|
uescape [uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
|
||||||
/* error rule to avoid backup */
|
/* error rule to avoid backup */
|
||||||
uescapefail ("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])
|
uescapefail [uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
|
||||||
|
|
||||||
/* Quoted identifier with Unicode escapes */
|
/* Quoted identifier with Unicode escapes */
|
||||||
xuistart [uU]&{dquote}
|
xuistart [uU]&{dquote}
|
||||||
xuistop1 {dquote}{whitespace}*{uescapefail}?
|
|
||||||
xuistop2 {dquote}{whitespace}*{uescape}
|
|
||||||
|
|
||||||
/* Quoted string with Unicode escapes */
|
/* Quoted string with Unicode escapes */
|
||||||
xusstart [uU]&{quote}
|
xusstart [uU]&{quote}
|
||||||
xusstop1 {quote}{whitespace}*{uescapefail}?
|
|
||||||
xusstop2 {quote}{whitespace}*{uescape}
|
/* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
|
||||||
|
xustop1 {uescapefail}?
|
||||||
|
xustop2 {uescape}
|
||||||
|
|
||||||
/* error rule to avoid backup */
|
/* error rule to avoid backup */
|
||||||
xufailed [uU]&
|
xufailed [uU]&
|
||||||
@ -536,15 +541,31 @@ other .
|
|||||||
yylval->str = litbufdup(yyscanner);
|
yylval->str = litbufdup(yyscanner);
|
||||||
return SCONST;
|
return SCONST;
|
||||||
}
|
}
|
||||||
<xus>{xusstop1} {
|
<xus>{quotestop} |
|
||||||
|
<xus>{quotefail} {
|
||||||
/* throw back all but the quote */
|
/* throw back all but the quote */
|
||||||
yyless(1);
|
yyless(1);
|
||||||
|
/* handle possible UESCAPE in xusend mode */
|
||||||
|
BEGIN(xusend);
|
||||||
|
}
|
||||||
|
<xusend>{whitespace}
|
||||||
|
<xusend>{other} |
|
||||||
|
<xusend>{xustop1} {
|
||||||
|
/* no UESCAPE after the quote, throw back everything */
|
||||||
|
yyless(0);
|
||||||
BEGIN(INITIAL);
|
BEGIN(INITIAL);
|
||||||
yylval->str = litbuf_udeescape('\\', yyscanner);
|
yylval->str = litbuf_udeescape('\\', yyscanner);
|
||||||
return SCONST;
|
return SCONST;
|
||||||
}
|
}
|
||||||
<xus>{xusstop2} {
|
<xusend>{xustop2} {
|
||||||
|
/* found UESCAPE after the end quote */
|
||||||
BEGIN(INITIAL);
|
BEGIN(INITIAL);
|
||||||
|
if (!check_uescapechar(yytext[yyleng-2]))
|
||||||
|
{
|
||||||
|
SET_YYLLOC();
|
||||||
|
ADVANCE_YYLLOC(yyleng-2);
|
||||||
|
yyerror("invalid Unicode escape character");
|
||||||
|
}
|
||||||
yylval->str = litbuf_udeescape(yytext[yyleng-2], yyscanner);
|
yylval->str = litbuf_udeescape(yytext[yyleng-2], yyscanner);
|
||||||
return SCONST;
|
return SCONST;
|
||||||
}
|
}
|
||||||
@ -702,9 +723,19 @@ other .
|
|||||||
yylval->str = ident;
|
yylval->str = ident;
|
||||||
return IDENT;
|
return IDENT;
|
||||||
}
|
}
|
||||||
<xui>{xuistop1} {
|
<xui>{dquote} {
|
||||||
|
yyless(1);
|
||||||
|
/* handle possible UESCAPE in xuiend mode */
|
||||||
|
BEGIN(xuiend);
|
||||||
|
}
|
||||||
|
<xuiend>{whitespace} { }
|
||||||
|
<xuiend>{other} |
|
||||||
|
<xuiend>{xustop1} {
|
||||||
|
/* no UESCAPE after the quote, throw back everything */
|
||||||
char *ident;
|
char *ident;
|
||||||
|
|
||||||
|
yyless(0);
|
||||||
|
|
||||||
BEGIN(INITIAL);
|
BEGIN(INITIAL);
|
||||||
if (yyextra->literallen == 0)
|
if (yyextra->literallen == 0)
|
||||||
yyerror("zero-length delimited identifier");
|
yyerror("zero-length delimited identifier");
|
||||||
@ -712,16 +743,21 @@ other .
|
|||||||
if (yyextra->literallen >= NAMEDATALEN)
|
if (yyextra->literallen >= NAMEDATALEN)
|
||||||
truncate_identifier(ident, yyextra->literallen, true);
|
truncate_identifier(ident, yyextra->literallen, true);
|
||||||
yylval->str = ident;
|
yylval->str = ident;
|
||||||
/* throw back all but the quote */
|
|
||||||
yyless(1);
|
|
||||||
return IDENT;
|
return IDENT;
|
||||||
}
|
}
|
||||||
<xui>{xuistop2} {
|
<xuiend>{xustop2} {
|
||||||
|
/* found UESCAPE after the end quote */
|
||||||
char *ident;
|
char *ident;
|
||||||
|
|
||||||
BEGIN(INITIAL);
|
BEGIN(INITIAL);
|
||||||
if (yyextra->literallen == 0)
|
if (yyextra->literallen == 0)
|
||||||
yyerror("zero-length delimited identifier");
|
yyerror("zero-length delimited identifier");
|
||||||
|
if (!check_uescapechar(yytext[yyleng-2]))
|
||||||
|
{
|
||||||
|
SET_YYLLOC();
|
||||||
|
ADVANCE_YYLLOC(yyleng-2);
|
||||||
|
yyerror("invalid Unicode escape character");
|
||||||
|
}
|
||||||
ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
|
ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
|
||||||
if (yyextra->literallen >= NAMEDATALEN)
|
if (yyextra->literallen >= NAMEDATALEN)
|
||||||
truncate_identifier(ident, yyextra->literallen, true);
|
truncate_identifier(ident, yyextra->literallen, true);
|
||||||
@ -1203,22 +1239,29 @@ addunicode(pg_wchar c, core_yyscan_t yyscanner)
|
|||||||
addlit(buf, pg_mblen(buf), yyscanner);
|
addlit(buf, pg_mblen(buf), yyscanner);
|
||||||
}
|
}
|
||||||
|
|
||||||
static char *
|
/* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
|
||||||
litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
|
static bool
|
||||||
|
check_uescapechar(unsigned char escape)
|
||||||
{
|
{
|
||||||
char *new;
|
|
||||||
char *litbuf, *in, *out;
|
|
||||||
pg_wchar pair_first = 0;
|
|
||||||
|
|
||||||
if (isxdigit(escape)
|
if (isxdigit(escape)
|
||||||
|| escape == '+'
|
|| escape == '+'
|
||||||
|| escape == '\''
|
|| escape == '\''
|
||||||
|| escape == '"'
|
|| escape == '"'
|
||||||
|| scanner_isspace(escape))
|
|| scanner_isspace(escape))
|
||||||
{
|
{
|
||||||
ADVANCE_YYLLOC(yyextra->literallen + yyleng + 1);
|
return false;
|
||||||
yyerror("invalid Unicode escape character");
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* like litbufdup, but handle unicode escapes */
|
||||||
|
static char *
|
||||||
|
litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
|
||||||
|
{
|
||||||
|
char *new;
|
||||||
|
char *litbuf, *in, *out;
|
||||||
|
pg_wchar pair_first = 0;
|
||||||
|
|
||||||
/* Make literalbuf null-terminated to simplify the scanning loop */
|
/* Make literalbuf null-terminated to simplify the scanning loop */
|
||||||
litbuf = yyextra->literalbuf;
|
litbuf = yyextra->literalbuf;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user