diff --git a/doc/src/sgml/ref/create_operator.sgml b/doc/src/sgml/ref/create_operator.sgml
index 7f06c4cece3..36d791d2a79 100644
--- a/doc/src/sgml/ref/create_operator.sgml
+++ b/doc/src/sgml/ref/create_operator.sgml
@@ -1,5 +1,5 @@
@@ -60,8 +60,8 @@ CREATE OPERATOR name ( PROCEDURE = type1
- The type for the left-hand side of the operator, if any. This option would be
- omitted for a right-unary operator.
+ The type of the left-hand argument of the operator, if any.
+ This option would be omitted for a left-unary operator.
@@ -69,8 +69,8 @@ CREATE OPERATOR name ( PROCEDURE = type2
- The type for the right-hand side of the operator, if any. This option would be
- omitted for a left-unary operator.
+ The type of the right-hand argument of the operator, if any.
+ This option would be omitted for a right-unary operator.
@@ -78,7 +78,7 @@ CREATE OPERATOR name ( PROCEDURE = com_op
- The commutator for this operator.
+ The commutator of this operator.
@@ -110,7 +110,7 @@ CREATE OPERATOR name ( PROCEDURE = HASHES
-Indicates this operator can support a hash-join algorithm.
+ Indicates this operator can support a hash join.
@@ -118,7 +118,8 @@ Indicates this operator can support a hash-join algorithm.
left_sort_op
- Operator that sorts the left-hand data type of this operator.
+ If this operator can support a merge join, the
+ operator that sorts the left-hand data type of this operator.
@@ -126,7 +127,8 @@ Indicates this operator can support a hash-join algorithm.
right_sort_op
- Operator that sorts the right-hand data type of this operator.
+ If this operator can support a merge join, the
+ operator that sorts the right-hand data type of this operator.
@@ -172,22 +174,56 @@ CREATE
The operator name
- is a sequence of up to thirty two (32) characters in any combination
- from the following:
+ is a sequence of up to NAMEDATALEN-1 (31 by default) characters
+ from the following list:
-+ - * / < > = ~ ! @ # % ^ & | ` ? $ :
++ - * / < > = ~ ! @ # % ^ & | ` ? $ :
+
+ There are a few restrictions on your choice of name:
+
+
+
+ "$" and ":" cannot be defined as single-character operators,
+ although they can be part of a multi-character operator name.
+
+
+
+
+ "--" and "/*" cannot appear anywhere in an operator name,
+ since they will be taken as the start of a comment.
+
+
+
+
+ A multi-character operator name cannot end in "+" or "-",
+ unless the name also contains at least one of these characters:
+
+~ ! @ # % ^ & | ` ? $ :
+
+ For example, @- is an allowed operator name,
+ but *- is not.
+ This restriction allows Postgres to
+ parse SQL-compliant queries without requiring spaces between tokens.
+
+
+
+
- No alphabetic characters are allowed in an operator name.
- This enables Postgres to parse SQL input
- into tokens without requiring spaces between each token.
+ When working with non-SQL-standard operator names, you will usually
+ need to separate adjacent operators with spaces to avoid ambiguity.
+ For example, if you have defined a left-unary operator named "@",
+ you cannot write X*@Y; you must write
+ X* @Y to ensure that
+ Postgres reads it as two operator names
+ not one.
- The operator "!=" is mapped to "<>" on input, so they are
- therefore equivalent.
+ The operator "!=" is mapped to "<>" on input, so these two names
+ are always equivalent.
At least one of LEFTARG and RIGHTARG must be defined. For
@@ -196,11 +232,11 @@ CREATE
unary operators only RIGHTARG should be defined.
- Also, the
+ The
func_name procedure must have
been previously defined using CREATE FUNCTION and must
be defined to accept the correct number of arguments
- (either one or two).
+ (either one or two) of the indicated types.
The commutator operator should be identified if one exists,
@@ -247,8 +283,6 @@ MYBOXES.description !== "0,0,1,1"::box
does not yet have a commutator itself, then the commutator's
entry is updated to have the newly created operator as its
commutator. This applies to the negator, as well.
-
-
This is to allow the definition of two operators that are
the commutators or the negators of each other. The first
operator should be defined without a commutator or negator
@@ -258,7 +292,7 @@ MYBOXES.description !== "0,0,1,1"::box
it also works to just have both operators refer to each other.)
- The next three specifications are present to support the
+ The HASHES, SORT1, and SORT2 options are present to support the
query optimizer in performing joins.
Postgres can always
evaluate a join (i.e., processing a clause with two tuple
@@ -294,9 +328,8 @@ MYBOXES.description !== "0,0,1,1"::box
be worth the complexity involved.
- The last two pieces of the specification are present so
- the query optimizer can estimate result sizes. If a
- clause of the form:
+ The RESTRICT and JOIN options assist the query optimizer in estimating
+ result sizes. If a clause of the form:
MYBOXES.description <<< "0,0,1,1"::box
@@ -310,7 +343,7 @@ MYBOXES.description <<< "0,0,1,1"::box
data types and returns a floating point number. The
query optimizer simply calls this function, passing the
parameter "0,0,1,1" and multiplies the result by the relation
- size to get the desired expected number of instances.
+ size to get the expected number of instances.
Similarly, when the operands of the operator both contain
@@ -318,7 +351,7 @@ MYBOXES.description <<< "0,0,1,1"::box
size of the resulting join. The function join_proc will
return another floating point number which will be multiplied
by the cardinalities of the two classes involved to
- compute the desired expected result size.
+ compute the expected result size.
The difference between the function
diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index 332464429cd..918d91a05cf 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -315,12 +315,11 @@ UNCOMMITTED UNNAMED
A comment
- is an arbitrary sequence of characters following double dashes up to the end
- of the line. We also support double-slashes as comments, e.g.:
+ is an arbitrary sequence of characters beginning with double dashes
+ and extending to the end of the line, e.g.:
-- This is a standard SQL comment
-// And this is another supported comment style, like C++
We also support C-style block comments, e.g.:
@@ -331,6 +330,9 @@ We also support C-style block comments, e.g.:
comment
*/
+
+A comment beginning with "/*" extends to the first occurrence of "*/".
+
@@ -340,17 +342,22 @@ We also support C-style block comments, e.g.:
Names in SQL are sequences of less than NAMEDATALEN alphanumeric characters,
starting with an alphabetic character. By default, NAMEDATALEN is set
- to 32, but at the time the system is built, NAMEDATALEN can be changed
+ to 32 (but at the time the system is built, NAMEDATALEN can be changed
by changing the #define in
- src/backend/include/postgres.h.
+ src/backend/include/postgres.h).
Underscore ("_") is considered an alphabetic character.
- In some contexts, names may contain other characters if surrounded
- by double quotes. For example, table or column names may contain otherwise
- disallowed characters such as spaces, ampersands, etc. using this
- technique.
+ Names containing other characters may be formed by surrounding them
+ with double quotes. For example, table or column names may contain
+ otherwise disallowed characters such as spaces, ampersands, etc. if
+ quoted. Quoting a name also makes it case-sensitive,
+ whereas unquoted names are always folded to lower case. For example,
+ the names FOO, foo
+ and "foo" are
+ considered the same by Postgres, but
+ "Foo" is a different name.
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index f972d6ead17..64a389b7680 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.67 2000/03/13 01:52:06 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.68 2000/03/18 18:03:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -87,10 +87,10 @@ static void addlit(char *ytext, int yleng);
* and to eliminate parsing troubles for numeric strings.
* Exclusive states:
* binary numeric string - thomas 1997-11-16
- * extended C-style comments - tgl 1997-07-12
- * delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
+ * extended C-style comments - thomas 1997-07-12
+ * delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
* hexadecimal numeric string - thomas 1997-11-16
- * quoted strings - tgl 1997-07-30
+ * quoted strings - thomas 1997-07-30
*/
%x xb
@@ -144,7 +144,7 @@ xdinside [^"]+
* have something like plus-slash-star, lex will think this is a 3-character
* operator whereas we want to see it as a + operator and a comment start.
* The solution is two-fold:
- * 1. append {op_and_self}* to xcstart so that it matches as much text as
+ * 1. append {op_chars}* to xcstart so that it matches as much text as
* {operator} would. Then the tie-breaker (first matching rule of same
* length) ensures xcstart wins. We put back the extra stuff with yyless()
* in case it contains a star-slash that should terminate the comment.
@@ -154,7 +154,7 @@ xdinside [^"]+
* SQL92-style comments, which start with dash-dash, have similar interactions
* with the operator rule.
*/
-xcstart \/\*{op_and_self}*
+xcstart \/\*{op_chars}*
xcstop \*+\/
xcinside ([^*]+)|(\*+[^/])
@@ -166,10 +166,19 @@ identifier {letter}{letter_or_digit}*
typecast "::"
-/* NB: if you change "self", fix the copy in the operator rule too! */
+/*
+ * "self" is the set of chars that should be returned as single-character
+ * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
+ * which can be one or more characters long (but if a single-char token
+ * appears in the "self" set, it is not to be returned as an Op). Note
+ * that the sets overlap, but each has some chars that are not in the other.
+ *
+ * If you change either set, adjust the character lists appearing in the
+ * rule for "operator"!
+ */
self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
-op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
-operator {op_and_self}+
+op_chars [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
+operator {op_chars}+
/* we no longer allow unary minus in numbers.
* instead we pass it separately to parser. there it gets
@@ -202,7 +211,7 @@ horiz_space [ \t\f]
newline [\n\r]
non_newline [^\n\r]
-comment (("--"|"//"){non_newline}*)
+comment ("--"{non_newline}*)
whitespace ({space}|{comment})
@@ -220,7 +229,7 @@ other .
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
* AT&T lex does not properly handle C-style comments in this second lex block.
- * So, put comments here. tgl - 1997-09-08
+ * So, put comments here. thomas - 1997-09-08
*
* Quoted strings must allow some special characters such as single-quote
* and newline.
@@ -329,23 +338,57 @@ other .
{self} { return yytext[0]; }
{operator} {
- /* Check for embedded slash-star or dash-dash */
- char *slashstar = strstr((char*)yytext, "/*");
- char *dashdash = strstr((char*)yytext, "--");
+ /*
+ * Check for embedded slash-star or dash-dash; those
+ * are comment starts, so operator must stop there.
+ * Note that slash-star or dash-dash at the first
+ * character will match a prior rule, not this one.
+ */
+ int nchars = yyleng;
+ char *slashstar = strstr((char*)yytext, "/*");
+ char *dashdash = strstr((char*)yytext, "--");
if (slashstar && dashdash)
{
+ /* if both appear, take the first one */
if (slashstar > dashdash)
slashstar = dashdash;
}
else if (!slashstar)
slashstar = dashdash;
-
if (slashstar)
+ nchars = slashstar - ((char*)yytext);
+
+ /*
+ * For SQL92 compatibility, '+' and '-' cannot be the
+ * last char of a multi-char operator unless the operator
+ * contains chars that are not in SQL92 operators.
+ * The idea is to lex '=-' as two operators, but not
+ * to forbid operator names like '?-' that could not be
+ * sequences of SQL92 operators.
+ */
+ while (nchars > 1 &&
+ (yytext[nchars-1] == '+' ||
+ yytext[nchars-1] == '-'))
{
- int nchars = slashstar - ((char*)yytext);
+ int ic;
+
+ for (ic = nchars-2; ic >= 0; ic--)
+ {
+ if (strchr("~!@#&`?$:%^|", yytext[ic]))
+ break;
+ }
+ if (ic >= 0)
+ break; /* found a char that makes it OK */
+ nchars--; /* else remove the +/-, and check again */
+ }
+
+ if (nchars < yyleng)
+ {
+ /* Strip the unwanted chars from the token */
yyless(nchars);
- /* If what we have left is only one char, and it's
+ /*
+ * If what we have left is only one char, and it's
* one of the characters matching "self", then
* return it as a character token the same way
* that the "self" rule would have.
@@ -355,8 +398,9 @@ other .
return yytext[0];
}
+ /* Convert "!=" operator to "<>" for compatibility */
if (strcmp((char*)yytext, "!=") == 0)
- yylval.str = pstrdup("<>"); /* compatibility */
+ yylval.str = pstrdup("<>");
else
yylval.str = pstrdup((char*)yytext);
return Op;
diff --git a/src/bin/psql/mainloop.c b/src/bin/psql/mainloop.c
index 4f71f3e4105..eadd50e94af 100644
--- a/src/bin/psql/mainloop.c
+++ b/src/bin/psql/mainloop.c
@@ -3,7 +3,7 @@
*
* Copyright 2000 by PostgreSQL Global Development Group
*
- * $Header: /cvsroot/pgsql/src/bin/psql/mainloop.c,v 1.25 2000/03/13 13:46:32 petere Exp $
+ * $Header: /cvsroot/pgsql/src/bin/psql/mainloop.c,v 1.26 2000/03/18 18:03:11 tgl Exp $
*/
#include "postgres.h"
#include "mainloop.h"
@@ -318,8 +318,7 @@ MainLoop(FILE *source)
}
/* single-line comment? truncate line */
- else if ((line[i] == '-' && line[i + thislen] == '-') ||
- (line[i] == '/' && line[i + thislen] == '/'))
+ else if (line[i] == '-' && line[i + thislen] == '-')
{
line[i] = '\0'; /* remove comment */
break;
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index bcc8e6430e2..992b293085b 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -12,7 +12,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.55 2000/03/18 05:44:21 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.56 2000/03/18 18:03:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -86,10 +86,10 @@ static struct _if_value {
* and to eliminate parsing troubles for numeric strings.
* Exclusive states:
* binary numeric string - thomas 1997-11-16
- * extended C-style comments - tgl 1997-07-12
- * delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
+ * extended C-style comments - thomas 1997-07-12
+ * delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
* hexadecimal numeric string - thomas 1997-11-16
- * quoted strings - tgl 1997-07-30
+ * quoted strings - thomas 1997-07-30
*/
%x xb
@@ -146,14 +146,16 @@ xdcqdq \\\"
xdcother [^"]
xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
-/* C-Style Comments
+/* C-style comments
+ *
* The "extended comment" syntax closely resembles allowable operator syntax.
* The tricky part here is to get lex to recognize a string starting with
* slash-star as a comment, when interpreting it as an operator would produce
- * a longer match --- remember lex will prefer a longer match! Also, if we
- * have tor whereas we want to see it as a + operator and a comment start.
+ * a longer match --- remember lex will prefer a longer match! Also, if we
+ * have something like plus-slash-star, lex will think this is a 3-character
+ * operator whereas we want to see it as a + operator and a comment start.
* The solution is two-fold:
- * 1. append {op_and_self}* to xcstart so that it matches as much text as
+ * 1. append {op_chars}* to xcstart so that it matches as much text as
* {operator} would. Then the tie-breaker (first matching rule of same
* length) ensures xcstart wins. We put back the extra stuff with yyless()
* in case it contains a star-slash that should terminate the comment.
@@ -163,22 +165,31 @@ xdcinside ({xdcqq}|{xdcqdq}|{xdcother})
* SQL92-style comments, which start with dash-dash, have similar interactions
* with the operator rule.
*/
-xcstart \/\*{op_and_self}*
+xcstart \/\*{op_chars}*
xcstop \*+\/
xcinside ([^*]+)|(\*+[^/])
digit [0-9]
letter [\200-\377_A-Za-z]
-letter_or_digit [\200-\377_A-Za-z0-9]
+letter_or_digit [\200-\377_A-Za-z0-9]
identifier {letter}{letter_or_digit}*
typecast "::"
-/* NB: if you change "self", fix the copy in the operator rule too! */
+/*
+ * "self" is the set of chars that should be returned as single-character
+ * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
+ * which can be one or more characters long (but if a single-char token
+ * appears in the "self" set, it is not to be returned as an Op). Note
+ * that the sets overlap, but each has some chars that are not in the other.
+ *
+ * If you change either set, adjust the character lists appearing in the
+ * rule for "operator"!
+ */
self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
-op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
-operator {op_and_self}+
+op_chars [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
+operator {op_chars}+
/* we no longer allow unary minus in numbers.
* instead we pass it separately to parser. there it gets
@@ -215,7 +226,7 @@ horiz_space [ \t\f]
newline [\n\r]
non_newline [^\n\r]
-comment (("--"|"//"){non_newline}*)
+comment ("--"{non_newline}*)
whitespace ({space}|{comment})
@@ -250,7 +261,7 @@ cppline {space}*#(.*\\{line_end})*.*
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
* AT&T lex does not properly handle C-style comments in this second lex block.
- * So, put comments here. tgl - 1997-09-08
+ * So, put comments here. thomas - 1997-09-08
*
* Quoted strings must allow some special characters such as single-quote
* and newline.
@@ -294,15 +305,16 @@ cppline {space}*#(.*\\{line_end})*.*
mmerror(ET_ERROR, "Bad binary integer input!");
return ICONST;
}
-<> { mmerror(ET_ERROR, "Unterminated binary integer"); }
{xhinside} |
{xbinside} {
addlit(yytext, yyleng);
}
{xhcat} |
-{xbcat} { /* ignore */
+{xbcat} {
+ /* ignore */
}
+<> { mmerror(ET_ERROR, "Unterminated binary integer"); }
{xhstart} {
BEGIN(xh);
@@ -367,23 +379,57 @@ cppline {space}*#(.*\\{line_end})*.*
return yytext[0];
}
{operator} {
- /* Check for embedded slash-star or dash-dash */
- char *slashstar = strstr((char*)yytext, "/*");
- char *dashdash = strstr((char*)yytext, "--");
+ /*
+ * Check for embedded slash-star or dash-dash; those
+ * are comment starts, so operator must stop there.
+ * Note that slash-star or dash-dash at the first
+ * character will match a prior rule, not this one.
+ */
+ int nchars = yyleng;
+ char *slashstar = strstr((char*)yytext, "/*");
+ char *dashdash = strstr((char*)yytext, "--");
if (slashstar && dashdash)
{
+ /* if both appear, take the first one */
if (slashstar > dashdash)
slashstar = dashdash;
}
else if (!slashstar)
slashstar = dashdash;
-
if (slashstar)
+ nchars = slashstar - ((char*)yytext);
+
+ /*
+ * For SQL92 compatibility, '+' and '-' cannot be the
+ * last char of a multi-char operator unless the operator
+ * contains chars that are not in SQL92 operators.
+ * The idea is to lex '=-' as two operators, but not
+ * to forbid operator names like '?-' that could not be
+ * sequences of SQL92 operators.
+ */
+ while (nchars > 1 &&
+ (yytext[nchars-1] == '+' ||
+ yytext[nchars-1] == '-'))
{
- int nchars = slashstar - ((char*)yytext);
+ int ic;
+
+ for (ic = nchars-2; ic >= 0; ic--)
+ {
+ if (strchr("~!@#&`?$:%^|", yytext[ic]))
+ break;
+ }
+ if (ic >= 0)
+ break; /* found a char that makes it OK */
+ nchars--; /* else remove the +/-, and check again */
+ }
+
+ if (nchars < yyleng)
+ {
+ /* Strip the unwanted chars from the token */
yyless(nchars);
- /* If what we have left is only one char, and it's
+ /*
+ * If what we have left is only one char, and it's
* one of the characters matching "self", then
* return it as a character token the same way
* that the "self" rule would have.
@@ -393,8 +439,9 @@ cppline {space}*#(.*\\{line_end})*.*
return yytext[0];
}
+ /* Convert "!=" operator to "<>" for compatibility */
if (strcmp((char*)yytext, "!=") == 0)
- yylval.str = mm_strdup("<>"); /* compatability */
+ yylval.str = mm_strdup("<>");
else
yylval.str = mm_strdup((char*)yytext);
return Op;