mirror of
https://github.com/postgres/postgres.git
synced 2025-05-21 00:02:53 -04:00
Modify lexing of multi-char operators per pghackers discussion around
16-Mar-00: trailing + or - is not part of the operator unless the operator also contains characters not present in SQL92-defined operators. This solves the 'X=-Y' problem without unduly constraining users' choice of operator names --- in particular, no existing Postgres operator names become invalid. Also, remove processing of // comments, as agreed in the same thread.
This commit is contained in:
parent
2b23e86447
commit
f945f46193
@ -1,5 +1,5 @@
|
|||||||
<!--
|
<!--
|
||||||
$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_operator.sgml,v 1.11 1999/07/22 15:09:08 thomas Exp $
|
$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_operator.sgml,v 1.12 2000/03/18 18:03:12 tgl Exp $
|
||||||
Postgres documentation
|
Postgres documentation
|
||||||
-->
|
-->
|
||||||
|
|
||||||
@ -60,8 +60,8 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
|
|||||||
<term><replaceable class="parameter">type1</replaceable></term>
|
<term><replaceable class="parameter">type1</replaceable></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
The type for the left-hand side of the operator, if any. This option would be
|
The type of the left-hand argument of the operator, if any.
|
||||||
omitted for a right-unary operator.
|
This option would be omitted for a left-unary operator.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -69,8 +69,8 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
|
|||||||
<term><replaceable class="parameter">type2</replaceable></term>
|
<term><replaceable class="parameter">type2</replaceable></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
The type for the right-hand side of the operator, if any. This option would be
|
The type of the right-hand argument of the operator, if any.
|
||||||
omitted for a left-unary operator.
|
This option would be omitted for a right-unary operator.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -78,7 +78,7 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
|
|||||||
<term><replaceable class="parameter">com_op</replaceable></term>
|
<term><replaceable class="parameter">com_op</replaceable></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
The commutator for this operator.
|
The commutator of this operator.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -110,7 +110,7 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
|
|||||||
<term>HASHES</term>
|
<term>HASHES</term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Indicates this operator can support a hash-join algorithm.
|
Indicates this operator can support a hash join.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -118,7 +118,8 @@ Indicates this operator can support a hash-join algorithm.
|
|||||||
<term><replaceable class="parameter">left_sort_op</replaceable></term>
|
<term><replaceable class="parameter">left_sort_op</replaceable></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Operator that sorts the left-hand data type of this operator.
|
If this operator can support a merge join, the
|
||||||
|
operator that sorts the left-hand data type of this operator.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -126,7 +127,8 @@ Indicates this operator can support a hash-join algorithm.
|
|||||||
<term><replaceable class="parameter">right_sort_op</replaceable></term>
|
<term><replaceable class="parameter">right_sort_op</replaceable></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Operator that sorts the right-hand data type of this operator.
|
If this operator can support a merge join, the
|
||||||
|
operator that sorts the right-hand data type of this operator.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -172,22 +174,56 @@ CREATE
|
|||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The operator <replaceable class="parameter">name</replaceable>
|
The operator <replaceable class="parameter">name</replaceable>
|
||||||
is a sequence of up to thirty two (32) characters in any combination
|
is a sequence of up to NAMEDATALEN-1 (31 by default) characters
|
||||||
from the following:
|
from the following list:
|
||||||
<literallayout>
|
<literallayout>
|
||||||
+ - * / < > = ~ ! @ # % ^ & | ` ? $ :
|
+ - * / < > = ~ ! @ # % ^ & | ` ? $ :
|
||||||
</literallayout>
|
</literallayout>
|
||||||
|
|
||||||
|
There are a few restrictions on your choice of name:
|
||||||
|
<itemizedlist>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
"$" and ":" cannot be defined as single-character operators,
|
||||||
|
although they can be part of a multi-character operator name.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
"--" and "/*" cannot appear anywhere in an operator name,
|
||||||
|
since they will be taken as the start of a comment.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
A multi-character operator name cannot end in "+" or "-",
|
||||||
|
unless the name also contains at least one of these characters:
|
||||||
|
<literallayout>
|
||||||
|
~ ! @ # % ^ & | ` ? $ :
|
||||||
|
</literallayout>
|
||||||
|
For example, <literal>@-</literal> is an allowed operator name,
|
||||||
|
but <literal>*-</literal> is not.
|
||||||
|
This restriction allows <productname>Postgres</productname> to
|
||||||
|
parse SQL-compliant queries without requiring spaces between tokens.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</itemizedlist>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
No alphabetic characters are allowed in an operator name.
|
When working with non-SQL-standard operator names, you will usually
|
||||||
This enables <productname>Postgres</productname> to parse SQL input
|
need to separate adjacent operators with spaces to avoid ambiguity.
|
||||||
into tokens without requiring spaces between each token.
|
For example, if you have defined a left-unary operator named "@",
|
||||||
|
you cannot write <literal>X*@Y</literal>; you must write
|
||||||
|
<literal>X* @Y</literal> to ensure that
|
||||||
|
<productname>Postgres</productname> reads it as two operator names
|
||||||
|
not one.
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The operator "!=" is mapped to "<>" on input, so they are
|
The operator "!=" is mapped to "<>" on input, so these two names
|
||||||
therefore equivalent.
|
are always equivalent.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
At least one of LEFTARG and RIGHTARG must be defined. For
|
At least one of LEFTARG and RIGHTARG must be defined. For
|
||||||
@ -196,11 +232,11 @@ CREATE
|
|||||||
unary operators only RIGHTARG should be defined.
|
unary operators only RIGHTARG should be defined.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Also, the
|
The
|
||||||
<replaceable class="parameter">func_name</replaceable> procedure must have
|
<replaceable class="parameter">func_name</replaceable> procedure must have
|
||||||
been previously defined using <command>CREATE FUNCTION</command> and must
|
been previously defined using <command>CREATE FUNCTION</command> and must
|
||||||
be defined to accept the correct number of arguments
|
be defined to accept the correct number of arguments
|
||||||
(either one or two).
|
(either one or two) of the indicated types.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The commutator operator should be identified if one exists,
|
The commutator operator should be identified if one exists,
|
||||||
@ -247,8 +283,6 @@ MYBOXES.description !== "0,0,1,1"::box
|
|||||||
does not yet have a commutator itself, then the commutator's
|
does not yet have a commutator itself, then the commutator's
|
||||||
entry is updated to have the newly created operator as its
|
entry is updated to have the newly created operator as its
|
||||||
commutator. This applies to the negator, as well.
|
commutator. This applies to the negator, as well.
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
This is to allow the definition of two operators that are
|
This is to allow the definition of two operators that are
|
||||||
the commutators or the negators of each other. The first
|
the commutators or the negators of each other. The first
|
||||||
operator should be defined without a commutator or negator
|
operator should be defined without a commutator or negator
|
||||||
@ -258,7 +292,7 @@ MYBOXES.description !== "0,0,1,1"::box
|
|||||||
it also works to just have both operators refer to each other.)
|
it also works to just have both operators refer to each other.)
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The next three specifications are present to support the
|
The HASHES, SORT1, and SORT2 options are present to support the
|
||||||
query optimizer in performing joins.
|
query optimizer in performing joins.
|
||||||
<productname>Postgres</productname> can always
|
<productname>Postgres</productname> can always
|
||||||
evaluate a join (i.e., processing a clause with two tuple
|
evaluate a join (i.e., processing a clause with two tuple
|
||||||
@ -294,9 +328,8 @@ MYBOXES.description !== "0,0,1,1"::box
|
|||||||
be worth the complexity involved.
|
be worth the complexity involved.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The last two pieces of the specification are present so
|
The RESTRICT and JOIN options assist the query optimizer in estimating
|
||||||
the query optimizer can estimate result sizes. If a
|
result sizes. If a clause of the form:
|
||||||
clause of the form:
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
MYBOXES.description <<< "0,0,1,1"::box
|
MYBOXES.description <<< "0,0,1,1"::box
|
||||||
</programlisting>
|
</programlisting>
|
||||||
@ -310,7 +343,7 @@ MYBOXES.description <<< "0,0,1,1"::box
|
|||||||
data types and returns a floating point number. The
|
data types and returns a floating point number. The
|
||||||
query optimizer simply calls this function, passing the
|
query optimizer simply calls this function, passing the
|
||||||
parameter "0,0,1,1" and multiplies the result by the relation
|
parameter "0,0,1,1" and multiplies the result by the relation
|
||||||
size to get the desired expected number of instances.
|
size to get the expected number of instances.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
Similarly, when the operands of the operator both contain
|
Similarly, when the operands of the operator both contain
|
||||||
@ -318,7 +351,7 @@ MYBOXES.description <<< "0,0,1,1"::box
|
|||||||
size of the resulting join. The function join_proc will
|
size of the resulting join. The function join_proc will
|
||||||
return another floating point number which will be multiplied
|
return another floating point number which will be multiplied
|
||||||
by the cardinalities of the two classes involved to
|
by the cardinalities of the two classes involved to
|
||||||
compute the desired expected result size.
|
compute the expected result size.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The difference between the function
|
The difference between the function
|
||||||
|
@ -315,12 +315,11 @@ UNCOMMITTED UNNAMED
|
|||||||
|
|
||||||
<para>
|
<para>
|
||||||
A <firstterm>comment</firstterm>
|
A <firstterm>comment</firstterm>
|
||||||
is an arbitrary sequence of characters following double dashes up to the end
|
is an arbitrary sequence of characters beginning with double dashes
|
||||||
of the line. We also support double-slashes as comments, e.g.:
|
and extending to the end of the line, e.g.:
|
||||||
|
|
||||||
<programlisting>
|
<programlisting>
|
||||||
-- This is a standard SQL comment
|
-- This is a standard SQL comment
|
||||||
// And this is another supported comment style, like C++
|
|
||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
We also support C-style block comments, e.g.:
|
We also support C-style block comments, e.g.:
|
||||||
@ -331,6 +330,9 @@ We also support C-style block comments, e.g.:
|
|||||||
comment
|
comment
|
||||||
*/
|
*/
|
||||||
</programlisting>
|
</programlisting>
|
||||||
|
|
||||||
|
A comment beginning with "/*" extends to the first occurrence of "*/".
|
||||||
|
|
||||||
</para>
|
</para>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
@ -340,17 +342,22 @@ We also support C-style block comments, e.g.:
|
|||||||
<para>
|
<para>
|
||||||
Names in SQL are sequences of less than NAMEDATALEN alphanumeric characters,
|
Names in SQL are sequences of less than NAMEDATALEN alphanumeric characters,
|
||||||
starting with an alphabetic character. By default, NAMEDATALEN is set
|
starting with an alphabetic character. By default, NAMEDATALEN is set
|
||||||
to 32, but at the time the system is built, NAMEDATALEN can be changed
|
to 32 (but at the time the system is built, NAMEDATALEN can be changed
|
||||||
by changing the <literal>#define</literal> in
|
by changing the <literal>#define</literal> in
|
||||||
src/backend/include/postgres.h.
|
src/backend/include/postgres.h).
|
||||||
Underscore ("_") is considered an alphabetic character.
|
Underscore ("_") is considered an alphabetic character.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
In some contexts, names may contain other characters if surrounded
|
Names containing other characters may be formed by surrounding them
|
||||||
by double quotes. For example, table or column names may contain otherwise
|
with double quotes. For example, table or column names may contain
|
||||||
disallowed characters such as spaces, ampersands, etc. using this
|
otherwise disallowed characters such as spaces, ampersands, etc. if
|
||||||
technique.
|
quoted. Quoting a name also makes it case-sensitive,
|
||||||
|
whereas unquoted names are always folded to lower case. For example,
|
||||||
|
the names <literal>FOO</literal>, <literal>foo</literal>
|
||||||
|
and <literal>"foo"</literal> are
|
||||||
|
considered the same by <productname>Postgres</productname>, but
|
||||||
|
<literal>"Foo"</literal> is a different name.
|
||||||
</para>
|
</para>
|
||||||
</sect1>
|
</sect1>
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.67 2000/03/13 01:52:06 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.68 2000/03/18 18:03:09 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -87,10 +87,10 @@ static void addlit(char *ytext, int yleng);
|
|||||||
* and to eliminate parsing troubles for numeric strings.
|
* and to eliminate parsing troubles for numeric strings.
|
||||||
* Exclusive states:
|
* Exclusive states:
|
||||||
* <xb> binary numeric string - thomas 1997-11-16
|
* <xb> binary numeric string - thomas 1997-11-16
|
||||||
* <xc> extended C-style comments - tgl 1997-07-12
|
* <xc> extended C-style comments - thomas 1997-07-12
|
||||||
* <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
|
* <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
|
||||||
* <xh> hexadecimal numeric string - thomas 1997-11-16
|
* <xh> hexadecimal numeric string - thomas 1997-11-16
|
||||||
* <xq> quoted strings - tgl 1997-07-30
|
* <xq> quoted strings - thomas 1997-07-30
|
||||||
*/
|
*/
|
||||||
|
|
||||||
%x xb
|
%x xb
|
||||||
@ -144,7 +144,7 @@ xdinside [^"]+
|
|||||||
* have something like plus-slash-star, lex will think this is a 3-character
|
* have something like plus-slash-star, lex will think this is a 3-character
|
||||||
* operator whereas we want to see it as a + operator and a comment start.
|
* operator whereas we want to see it as a + operator and a comment start.
|
||||||
* The solution is two-fold:
|
* The solution is two-fold:
|
||||||
* 1. append {op_and_self}* to xcstart so that it matches as much text as
|
* 1. append {op_chars}* to xcstart so that it matches as much text as
|
||||||
* {operator} would. Then the tie-breaker (first matching rule of same
|
* {operator} would. Then the tie-breaker (first matching rule of same
|
||||||
* length) ensures xcstart wins. We put back the extra stuff with yyless()
|
* length) ensures xcstart wins. We put back the extra stuff with yyless()
|
||||||
* in case it contains a star-slash that should terminate the comment.
|
* in case it contains a star-slash that should terminate the comment.
|
||||||
@ -154,7 +154,7 @@ xdinside [^"]+
|
|||||||
* SQL92-style comments, which start with dash-dash, have similar interactions
|
* SQL92-style comments, which start with dash-dash, have similar interactions
|
||||||
* with the operator rule.
|
* with the operator rule.
|
||||||
*/
|
*/
|
||||||
xcstart \/\*{op_and_self}*
|
xcstart \/\*{op_chars}*
|
||||||
xcstop \*+\/
|
xcstop \*+\/
|
||||||
xcinside ([^*]+)|(\*+[^/])
|
xcinside ([^*]+)|(\*+[^/])
|
||||||
|
|
||||||
@ -166,10 +166,19 @@ identifier {letter}{letter_or_digit}*
|
|||||||
|
|
||||||
typecast "::"
|
typecast "::"
|
||||||
|
|
||||||
/* NB: if you change "self", fix the copy in the operator rule too! */
|
/*
|
||||||
|
* "self" is the set of chars that should be returned as single-character
|
||||||
|
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
|
||||||
|
* which can be one or more characters long (but if a single-char token
|
||||||
|
* appears in the "self" set, it is not to be returned as an Op). Note
|
||||||
|
* that the sets overlap, but each has some chars that are not in the other.
|
||||||
|
*
|
||||||
|
* If you change either set, adjust the character lists appearing in the
|
||||||
|
* rule for "operator"!
|
||||||
|
*/
|
||||||
self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
|
self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
|
||||||
op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
|
op_chars [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
|
||||||
operator {op_and_self}+
|
operator {op_chars}+
|
||||||
|
|
||||||
/* we no longer allow unary minus in numbers.
|
/* we no longer allow unary minus in numbers.
|
||||||
* instead we pass it separately to parser. there it gets
|
* instead we pass it separately to parser. there it gets
|
||||||
@ -202,7 +211,7 @@ horiz_space [ \t\f]
|
|||||||
newline [\n\r]
|
newline [\n\r]
|
||||||
non_newline [^\n\r]
|
non_newline [^\n\r]
|
||||||
|
|
||||||
comment (("--"|"//"){non_newline}*)
|
comment ("--"{non_newline}*)
|
||||||
|
|
||||||
whitespace ({space}|{comment})
|
whitespace ({space}|{comment})
|
||||||
|
|
||||||
@ -220,7 +229,7 @@ other .
|
|||||||
|
|
||||||
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
|
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
|
||||||
* AT&T lex does not properly handle C-style comments in this second lex block.
|
* AT&T lex does not properly handle C-style comments in this second lex block.
|
||||||
* So, put comments here. tgl - 1997-09-08
|
* So, put comments here. thomas - 1997-09-08
|
||||||
*
|
*
|
||||||
* Quoted strings must allow some special characters such as single-quote
|
* Quoted strings must allow some special characters such as single-quote
|
||||||
* and newline.
|
* and newline.
|
||||||
@ -329,23 +338,57 @@ other .
|
|||||||
{self} { return yytext[0]; }
|
{self} { return yytext[0]; }
|
||||||
|
|
||||||
{operator} {
|
{operator} {
|
||||||
/* Check for embedded slash-star or dash-dash */
|
/*
|
||||||
char *slashstar = strstr((char*)yytext, "/*");
|
* Check for embedded slash-star or dash-dash; those
|
||||||
char *dashdash = strstr((char*)yytext, "--");
|
* are comment starts, so operator must stop there.
|
||||||
|
* Note that slash-star or dash-dash at the first
|
||||||
|
* character will match a prior rule, not this one.
|
||||||
|
*/
|
||||||
|
int nchars = yyleng;
|
||||||
|
char *slashstar = strstr((char*)yytext, "/*");
|
||||||
|
char *dashdash = strstr((char*)yytext, "--");
|
||||||
|
|
||||||
if (slashstar && dashdash)
|
if (slashstar && dashdash)
|
||||||
{
|
{
|
||||||
|
/* if both appear, take the first one */
|
||||||
if (slashstar > dashdash)
|
if (slashstar > dashdash)
|
||||||
slashstar = dashdash;
|
slashstar = dashdash;
|
||||||
}
|
}
|
||||||
else if (!slashstar)
|
else if (!slashstar)
|
||||||
slashstar = dashdash;
|
slashstar = dashdash;
|
||||||
|
|
||||||
if (slashstar)
|
if (slashstar)
|
||||||
|
nchars = slashstar - ((char*)yytext);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For SQL92 compatibility, '+' and '-' cannot be the
|
||||||
|
* last char of a multi-char operator unless the operator
|
||||||
|
* contains chars that are not in SQL92 operators.
|
||||||
|
* The idea is to lex '=-' as two operators, but not
|
||||||
|
* to forbid operator names like '?-' that could not be
|
||||||
|
* sequences of SQL92 operators.
|
||||||
|
*/
|
||||||
|
while (nchars > 1 &&
|
||||||
|
(yytext[nchars-1] == '+' ||
|
||||||
|
yytext[nchars-1] == '-'))
|
||||||
{
|
{
|
||||||
int nchars = slashstar - ((char*)yytext);
|
int ic;
|
||||||
|
|
||||||
|
for (ic = nchars-2; ic >= 0; ic--)
|
||||||
|
{
|
||||||
|
if (strchr("~!@#&`?$:%^|", yytext[ic]))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (ic >= 0)
|
||||||
|
break; /* found a char that makes it OK */
|
||||||
|
nchars--; /* else remove the +/-, and check again */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nchars < yyleng)
|
||||||
|
{
|
||||||
|
/* Strip the unwanted chars from the token */
|
||||||
yyless(nchars);
|
yyless(nchars);
|
||||||
/* If what we have left is only one char, and it's
|
/*
|
||||||
|
* If what we have left is only one char, and it's
|
||||||
* one of the characters matching "self", then
|
* one of the characters matching "self", then
|
||||||
* return it as a character token the same way
|
* return it as a character token the same way
|
||||||
* that the "self" rule would have.
|
* that the "self" rule would have.
|
||||||
@ -355,8 +398,9 @@ other .
|
|||||||
return yytext[0];
|
return yytext[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Convert "!=" operator to "<>" for compatibility */
|
||||||
if (strcmp((char*)yytext, "!=") == 0)
|
if (strcmp((char*)yytext, "!=") == 0)
|
||||||
yylval.str = pstrdup("<>"); /* compatibility */
|
yylval.str = pstrdup("<>");
|
||||||
else
|
else
|
||||||
yylval.str = pstrdup((char*)yytext);
|
yylval.str = pstrdup((char*)yytext);
|
||||||
return Op;
|
return Op;
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* Copyright 2000 by PostgreSQL Global Development Group
|
* Copyright 2000 by PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* $Header: /cvsroot/pgsql/src/bin/psql/mainloop.c,v 1.25 2000/03/13 13:46:32 petere Exp $
|
* $Header: /cvsroot/pgsql/src/bin/psql/mainloop.c,v 1.26 2000/03/18 18:03:11 tgl Exp $
|
||||||
*/
|
*/
|
||||||
#include "postgres.h"
|
#include "postgres.h"
|
||||||
#include "mainloop.h"
|
#include "mainloop.h"
|
||||||
@ -318,8 +318,7 @@ MainLoop(FILE *source)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* single-line comment? truncate line */
|
/* single-line comment? truncate line */
|
||||||
else if ((line[i] == '-' && line[i + thislen] == '-') ||
|
else if (line[i] == '-' && line[i + thislen] == '-')
|
||||||
(line[i] == '/' && line[i + thislen] == '/'))
|
|
||||||
{
|
{
|
||||||
line[i] = '\0'; /* remove comment */
|
line[i] = '\0'; /* remove comment */
|
||||||
break;
|
break;
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.55 2000/03/18 05:44:21 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.56 2000/03/18 18:03:10 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -86,10 +86,10 @@ static struct _if_value {
|
|||||||
* and to eliminate parsing troubles for numeric strings.
|
* and to eliminate parsing troubles for numeric strings.
|
||||||
* Exclusive states:
|
* Exclusive states:
|
||||||
* <xb> binary numeric string - thomas 1997-11-16
|
* <xb> binary numeric string - thomas 1997-11-16
|
||||||
* <xc> extended C-style comments - tgl 1997-07-12
|
* <xc> extended C-style comments - thomas 1997-07-12
|
||||||
* <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
|
* <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
|
||||||
* <xh> hexadecimal numeric string - thomas 1997-11-16
|
* <xh> hexadecimal numeric string - thomas 1997-11-16
|
||||||
* <xq> quoted strings - tgl 1997-07-30
|
* <xq> quoted strings - thomas 1997-07-30
|
||||||
*/
|
*/
|
||||||
|
|
||||||
%x xb
|
%x xb
|
||||||
@ -146,14 +146,16 @@ xdcqdq \\\"
|
|||||||
xdcother [^"]
|
xdcother [^"]
|
||||||
xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
|
xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
|
||||||
|
|
||||||
/* C-Style Comments
|
/* C-style comments
|
||||||
|
*
|
||||||
* The "extended comment" syntax closely resembles allowable operator syntax.
|
* The "extended comment" syntax closely resembles allowable operator syntax.
|
||||||
* The tricky part here is to get lex to recognize a string starting with
|
* The tricky part here is to get lex to recognize a string starting with
|
||||||
* slash-star as a comment, when interpreting it as an operator would produce
|
* slash-star as a comment, when interpreting it as an operator would produce
|
||||||
* a longer match --- remember lex will prefer a longer match! Also, if we
|
* a longer match --- remember lex will prefer a longer match! Also, if we
|
||||||
* have tor whereas we want to see it as a + operator and a comment start.
|
* have something like plus-slash-star, lex will think this is a 3-character
|
||||||
|
* operator whereas we want to see it as a + operator and a comment start.
|
||||||
* The solution is two-fold:
|
* The solution is two-fold:
|
||||||
* 1. append {op_and_self}* to xcstart so that it matches as much text as
|
* 1. append {op_chars}* to xcstart so that it matches as much text as
|
||||||
* {operator} would. Then the tie-breaker (first matching rule of same
|
* {operator} would. Then the tie-breaker (first matching rule of same
|
||||||
* length) ensures xcstart wins. We put back the extra stuff with yyless()
|
* length) ensures xcstart wins. We put back the extra stuff with yyless()
|
||||||
* in case it contains a star-slash that should terminate the comment.
|
* in case it contains a star-slash that should terminate the comment.
|
||||||
@ -163,22 +165,31 @@ xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
|
|||||||
* SQL92-style comments, which start with dash-dash, have similar interactions
|
* SQL92-style comments, which start with dash-dash, have similar interactions
|
||||||
* with the operator rule.
|
* with the operator rule.
|
||||||
*/
|
*/
|
||||||
xcstart \/\*{op_and_self}*
|
xcstart \/\*{op_chars}*
|
||||||
xcstop \*+\/
|
xcstop \*+\/
|
||||||
xcinside ([^*]+)|(\*+[^/])
|
xcinside ([^*]+)|(\*+[^/])
|
||||||
|
|
||||||
digit [0-9]
|
digit [0-9]
|
||||||
letter [\200-\377_A-Za-z]
|
letter [\200-\377_A-Za-z]
|
||||||
letter_or_digit [\200-\377_A-Za-z0-9]
|
letter_or_digit [\200-\377_A-Za-z0-9]
|
||||||
|
|
||||||
identifier {letter}{letter_or_digit}*
|
identifier {letter}{letter_or_digit}*
|
||||||
|
|
||||||
typecast "::"
|
typecast "::"
|
||||||
|
|
||||||
/* NB: if you change "self", fix the copy in the operator rule too! */
|
/*
|
||||||
|
* "self" is the set of chars that should be returned as single-character
|
||||||
|
* tokens. "op_chars" is the set of chars that can make up "Op" tokens,
|
||||||
|
* which can be one or more characters long (but if a single-char token
|
||||||
|
* appears in the "self" set, it is not to be returned as an Op). Note
|
||||||
|
* that the sets overlap, but each has some chars that are not in the other.
|
||||||
|
*
|
||||||
|
* If you change either set, adjust the character lists appearing in the
|
||||||
|
* rule for "operator"!
|
||||||
|
*/
|
||||||
self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
|
self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
|
||||||
op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
|
op_chars [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
|
||||||
operator {op_and_self}+
|
operator {op_chars}+
|
||||||
|
|
||||||
/* we no longer allow unary minus in numbers.
|
/* we no longer allow unary minus in numbers.
|
||||||
* instead we pass it separately to parser. there it gets
|
* instead we pass it separately to parser. there it gets
|
||||||
@ -215,7 +226,7 @@ horiz_space [ \t\f]
|
|||||||
newline [\n\r]
|
newline [\n\r]
|
||||||
non_newline [^\n\r]
|
non_newline [^\n\r]
|
||||||
|
|
||||||
comment (("--"|"//"){non_newline}*)
|
comment ("--"{non_newline}*)
|
||||||
|
|
||||||
whitespace ({space}|{comment})
|
whitespace ({space}|{comment})
|
||||||
|
|
||||||
@ -250,7 +261,7 @@ cppline {space}*#(.*\\{line_end})*.*
|
|||||||
|
|
||||||
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
|
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
|
||||||
* AT&T lex does not properly handle C-style comments in this second lex block.
|
* AT&T lex does not properly handle C-style comments in this second lex block.
|
||||||
* So, put comments here. tgl - 1997-09-08
|
* So, put comments here. thomas - 1997-09-08
|
||||||
*
|
*
|
||||||
* Quoted strings must allow some special characters such as single-quote
|
* Quoted strings must allow some special characters such as single-quote
|
||||||
* and newline.
|
* and newline.
|
||||||
@ -294,15 +305,16 @@ cppline {space}*#(.*\\{line_end})*.*
|
|||||||
mmerror(ET_ERROR, "Bad binary integer input!");
|
mmerror(ET_ERROR, "Bad binary integer input!");
|
||||||
return ICONST;
|
return ICONST;
|
||||||
}
|
}
|
||||||
<xb><<EOF>> { mmerror(ET_ERROR, "Unterminated binary integer"); }
|
|
||||||
|
|
||||||
<xh>{xhinside} |
|
<xh>{xhinside} |
|
||||||
<xb>{xbinside} {
|
<xb>{xbinside} {
|
||||||
addlit(yytext, yyleng);
|
addlit(yytext, yyleng);
|
||||||
}
|
}
|
||||||
<xh>{xhcat} |
|
<xh>{xhcat} |
|
||||||
<xb>{xbcat} { /* ignore */
|
<xb>{xbcat} {
|
||||||
|
/* ignore */
|
||||||
}
|
}
|
||||||
|
<xb><<EOF>> { mmerror(ET_ERROR, "Unterminated binary integer"); }
|
||||||
|
|
||||||
<SQL>{xhstart} {
|
<SQL>{xhstart} {
|
||||||
BEGIN(xh);
|
BEGIN(xh);
|
||||||
@ -367,23 +379,57 @@ cppline {space}*#(.*\\{line_end})*.*
|
|||||||
return yytext[0];
|
return yytext[0];
|
||||||
}
|
}
|
||||||
<SQL>{operator} {
|
<SQL>{operator} {
|
||||||
/* Check for embedded slash-star or dash-dash */
|
/*
|
||||||
char *slashstar = strstr((char*)yytext, "/*");
|
* Check for embedded slash-star or dash-dash; those
|
||||||
char *dashdash = strstr((char*)yytext, "--");
|
* are comment starts, so operator must stop there.
|
||||||
|
* Note that slash-star or dash-dash at the first
|
||||||
|
* character will match a prior rule, not this one.
|
||||||
|
*/
|
||||||
|
int nchars = yyleng;
|
||||||
|
char *slashstar = strstr((char*)yytext, "/*");
|
||||||
|
char *dashdash = strstr((char*)yytext, "--");
|
||||||
|
|
||||||
if (slashstar && dashdash)
|
if (slashstar && dashdash)
|
||||||
{
|
{
|
||||||
|
/* if both appear, take the first one */
|
||||||
if (slashstar > dashdash)
|
if (slashstar > dashdash)
|
||||||
slashstar = dashdash;
|
slashstar = dashdash;
|
||||||
}
|
}
|
||||||
else if (!slashstar)
|
else if (!slashstar)
|
||||||
slashstar = dashdash;
|
slashstar = dashdash;
|
||||||
|
|
||||||
if (slashstar)
|
if (slashstar)
|
||||||
|
nchars = slashstar - ((char*)yytext);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For SQL92 compatibility, '+' and '-' cannot be the
|
||||||
|
* last char of a multi-char operator unless the operator
|
||||||
|
* contains chars that are not in SQL92 operators.
|
||||||
|
* The idea is to lex '=-' as two operators, but not
|
||||||
|
* to forbid operator names like '?-' that could not be
|
||||||
|
* sequences of SQL92 operators.
|
||||||
|
*/
|
||||||
|
while (nchars > 1 &&
|
||||||
|
(yytext[nchars-1] == '+' ||
|
||||||
|
yytext[nchars-1] == '-'))
|
||||||
{
|
{
|
||||||
int nchars = slashstar - ((char*)yytext);
|
int ic;
|
||||||
|
|
||||||
|
for (ic = nchars-2; ic >= 0; ic--)
|
||||||
|
{
|
||||||
|
if (strchr("~!@#&`?$:%^|", yytext[ic]))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (ic >= 0)
|
||||||
|
break; /* found a char that makes it OK */
|
||||||
|
nchars--; /* else remove the +/-, and check again */
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nchars < yyleng)
|
||||||
|
{
|
||||||
|
/* Strip the unwanted chars from the token */
|
||||||
yyless(nchars);
|
yyless(nchars);
|
||||||
/* If what we have left is only one char, and it's
|
/*
|
||||||
|
* If what we have left is only one char, and it's
|
||||||
* one of the characters matching "self", then
|
* one of the characters matching "self", then
|
||||||
* return it as a character token the same way
|
* return it as a character token the same way
|
||||||
* that the "self" rule would have.
|
* that the "self" rule would have.
|
||||||
@ -393,8 +439,9 @@ cppline {space}*#(.*\\{line_end})*.*
|
|||||||
return yytext[0];
|
return yytext[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Convert "!=" operator to "<>" for compatibility */
|
||||||
if (strcmp((char*)yytext, "!=") == 0)
|
if (strcmp((char*)yytext, "!=") == 0)
|
||||||
yylval.str = mm_strdup("<>"); /* compatability */
|
yylval.str = mm_strdup("<>");
|
||||||
else
|
else
|
||||||
yylval.str = mm_strdup((char*)yytext);
|
yylval.str = mm_strdup((char*)yytext);
|
||||||
return Op;
|
return Op;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user