diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index f0cb3cdb674..f9f67ed818d 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -1,5 +1,5 @@ @@ -880,115 +880,234 @@ as with a PGresult returned by libpq - Escaping strings for inclusion in SQL queries + Escaping Strings for Inclusion in SQL Commands + PQescapeStringConn + PQescapeString escaping strings -PQescapeString - Escapes a string for use within an SQL query. +PQescapeStringConn escapes a string for use within an SQL +command. This is useful when inserting data values as literal constants +in SQL commands. Certain characters (such as quotes and backslashes) must +be escaped to prevent them from being interpreted specially by the SQL parser. +PQescapeStringConn performs this operation. + + + + +It is especially important to do proper escaping when handling strings that +were received from an untrustworthy source. Otherwise there is a security +risk: you are vulnerable to SQL injection attacks wherein unwanted +SQL commands are fed to your database. + + + + + +size_t PQescapeStringConn (PGconn *conn, + char *to, const char *from, size_t length, + int *error); + + + + +PQescapeStringConn writes an escaped +version of the from string to the to +buffer, escaping special characters so that they cannot cause any +harm, and adding a terminating zero byte. The single quotes that +must surround PostgreSQL string literals are not +included in the result string; they should be provided in the SQL +command that the result is inserted into. +The parameter from points to the first character of the string +that is to be escaped, and the length parameter gives the +number of bytes in this string. A terminating zero byte is not +required, and should not be counted in length. (If +a terminating zero byte is found before length bytes are +processed, PQescapeStringConn stops at the zero; the behavior +is thus rather like strncpy.) +to shall point to a +buffer that is able to hold at least one more byte than twice +the value of length, otherwise the behavior is +undefined. +Behavior is likewise undefined if the to and from +strings overlap. + + +If the error parameter is not NULL, then *error +is set to zero on success, nonzero on error. Presently the only possible +error conditions involve invalid multibyte encoding in the source string. +The output string is still generated on error, but it can be expected that +the server will reject it as malformed. On error, a suitable message is +stored in the conn object, whether or not error +is NULL. + + +PQescapeStringConn returns the number of bytes written +to to, not including the terminating zero byte. + + + size_t PQescapeString (char *to, const char *from, size_t length); -If you want to include strings that have been received -from a source that is not trustworthy (for example, because a random user -entered them), you cannot directly include them in SQL -queries for security reasons. Instead, you have to quote special -characters that are otherwise interpreted by the SQL parser. + + + +PQescapeString is an older, deprecated version of +PQescapeStringConn; the difference is that it does not +take conn or error parameters. Because of this, +it cannot adjust its behavior depending on the connection properties (such as +character encoding) and therefore it may give the wrong results. +Also, it has no way to report error conditions. -PQescapeString performs this operation. The -from points to the first character of the string that -is to be escaped, and the length parameter counts the -number of characters in this string (a terminating zero byte is -neither necessary nor counted). to shall point to a -buffer that is able to hold at least one more character than twice -the value of length, otherwise the behavior is -undefined. A call to PQescapeString writes an escaped -version of the from string to the to -buffer, replacing special characters so that they cannot cause any -harm, and adding a terminating zero byte. The single quotes that -must surround PostgreSQL string literals are not part of the result -string. - - -PQescapeString returns the number of characters written -to to, not including the terminating zero byte. -Behavior is undefined when the to and from -strings overlap. +PQescapeString can be used safely in single-threaded client +programs that work with only one PostgreSQL connection at +a time (in this case it can find out what it needs to know behind the +scenes). In other contexts it is a security hazard and should be avoided +in favor of PQescapeStringConn. - Escaping binary strings for inclusion in SQL queries - - escaping binary strings - - - PQescapeBytea - Escapes a binary string (bytea type) for use within an SQL query. - - unsigned char *PQescapeBytea(unsigned char *from, - size_t from_length, - size_t *to_length); - + Escaping Binary Strings for Inclusion in SQL Commands - Certain ASCII characters must - be escaped (but all characters may be escaped) - when used as part of a bytea - string literal in an SQL statement. In general, to - escape a character, it is converted into the three digit octal number - equal to the decimal ASCII value, and preceded by - two backslashes. The single quote (') and backslash (\) characters have - special alternate escape sequences. See the User's Guide - for more information. PQescapeBytea - performs this operation, escaping only the minimally - required characters. + + bytea + in libpq + + + + + PQescapeByteaConnPQescapeByteaConn + + + Escapes binary data for use within an SQL command with the type + bytea. + +unsigned char *PQescapeByteaConn(PGconn *conn, + const unsigned char *from, + size_t from_length, + size_t *to_length); + + + + + Certain byte values must be escaped (but all + byte values can be escaped) when used as part + of a bytea literal in an SQL + statement. In general, to escape a byte, it is converted into the + three digit octal number equal to the octet value, and preceded by + one or two backslashes. The single quote (') and backslash + (\) characters have special alternative escape + sequences. PQescapeByteaConn performs this + operation, escaping only the minimally required bytes. The from parameter points to the first - character of the string that is to be escaped, and the - from_length parameter reflects the number of - characters in this binary string (a terminating zero byte is - neither necessary nor counted). The to_length - parameter shall point to a buffer suitable to hold the resultant - escaped string length. The result string length includes the terminating + byte of the string that is to be escaped, and the + from_length parameter gives the number of + bytes in this binary string. (A terminating zero byte is + neither necessary nor counted.) The to_length + parameter points to a variable that will hold the resultant + escaped string length. This result string length includes the terminating zero byte of the result. - PQescapeBytea returns an escaped version of the - from parameter binary string, to a caller-provided - buffer. The return string has all special characters replaced - so that they can be properly processed by the PostgreSQL string literal - parser, and the bytea input function. A terminating zero - byte is also added. The single quotes that must surround - PostgreSQL string literals are not part of the result string. + PQescapeByteaConn returns an escaped version of the + from parameter binary string in memory + allocated with malloc(). This memory must be freed using + free() when the result is no longer needed. The + return string has all special characters replaced so that they can + be properly processed by the PostgreSQL + string literal parser, and the bytea input function. A + terminating zero byte is also added. The single quotes that must + surround PostgreSQL string literals are + not part of the result string. - PQunescapeBytea - Converts an escaped string representation of binary data into binary - data - the reverse of PQescapeBytea. - - unsigned char *PQunescapeBytea(unsigned char *from, size_t *to_length); - - - The from parameter points to an escaped string - such as might be returned by PQgetvalue of a - BYTEA column. PQunescapeBytea converts - this string representation into its binary representation, filling the supplied buffer. - It returns a pointer to the buffer which is NULL on error, and the size - of the buffer in to_length. The pointer may - subsequently be used as an argument to the function - free(3). + On error, a NULL pointer is returned, and a suitable error message + is stored in the conn object. Currently, the only + possible error is insufficient memory for the result string. + + + + + PQescapeByteaPQescapeBytea + + + PQescapeBytea is an older, deprecated version of + PQescapeByteaConn. + +unsigned char *PQescapeBytea(unsigned char *from, + size_t from_length, + size_t *to_length); + + + + + The only difference from PQescapeByteaConn is that + PQescapeBytea does not + take a PGconn parameter. Because of this, it cannot adjust + its behavior depending on the connection properties + and therefore it may give the wrong results. Also, it + has no way to return an error message on failure. + + + + PQescapeBytea can be used safely in single-threaded client + programs that work with only one PostgreSQL connection at + a time (in this case it can find out what it needs to know behind the + scenes). In other contexts it is a security hazard and should be + avoided in favor of PQescapeByteaConn. + + + + + + PQunescapeByteaPQunescapeBytea + + + Converts a string representation of binary data into binary + data --- the reverse of PQescapeBytea. + This is needed when retrieving bytea data in text format, + but not when retrieving it in binary format. + + +unsigned char *PQunescapeBytea(unsigned char *from, size_t *to_length); + + + + + The from parameter points to a string + such as might be returned by PQgetvalue when applied + to a bytea column. PQunescapeBytea + converts this string representation into its binary representation. + It returns a pointer to a buffer allocated with + malloc(), or null on error, and puts the size of + the buffer in to_length. The result must be + freed using free() when it is no longer needed. + + + + This conversion is not exactly the inverse of + PQescapeBytea, because the string is not expected + to be escaped when received from PQgetvalue. + In particular this means there is no need for string quoting considerations, + and so no need for a PGconn parameter. + + + + - Retrieving SELECT Result Information diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 0bdb55e1eae..318dc0168af 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-connect.c,v 1.213.2.5 2005/07/14 14:07:50 tgl Exp $ + * $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-connect.c,v 1.213.2.6 2006/05/21 20:20:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1642,6 +1642,7 @@ PQsetenvPoll(PGconn *conn) goto error_return; } conn->client_encoding = encoding; + PQ_static_client_encoding = conn->client_encoding; /* Move on to setting the environment options */ conn->setenv_state = SETENV_STATE_OPTION_SEND; @@ -1668,6 +1669,7 @@ PQsetenvPoll(PGconn *conn) conn->client_encoding = PG_SQL_ASCII; else conn->client_encoding = pg_char_to_encoding(encoding); + PQ_static_client_encoding = conn->client_encoding; } else if (PQresultStatus(res) != PGRES_COMMAND_OK) { @@ -1857,6 +1859,7 @@ makeEmptyPGconn(void) conn->status = CONNECTION_BAD; conn->asyncStatus = PGASYNC_IDLE; conn->setenv_state = SETENV_STATE_IDLE; + conn->client_encoding = PG_SQL_ASCII; conn->notifyList = DLNewList(); conn->sock = -1; #ifdef USE_SSL @@ -2763,6 +2766,7 @@ PQsetClientEncoding(PGconn *conn, const char *encoding) { /* change libpq internal encoding */ conn->client_encoding = pg_char_to_encoding(encoding); + PQ_static_client_encoding = conn->client_encoding; status = 0; /* everything is ok */ } PQclear(res); diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index b5dc8de0a87..071c102d65d 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-exec.c,v 1.122.2.2 2006/05/21 19:56:41 momjian Exp $ + * $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-exec.c,v 1.122.2.3 2006/05/21 20:20:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,6 +20,7 @@ #include "libpq-fe.h" #include "libpq-int.h" +#include "mb/pg_wchar.h" #ifdef WIN32 #include "win32.h" @@ -39,6 +40,12 @@ char *const pgresStatus[] = { "PGRES_FATAL_ERROR" }; +/* + * static state needed by PQescapeString and PQescapeBytea; initialize to + * values that result in backward-compatible behavior + */ +int PQ_static_client_encoding = PG_SQL_ASCII; +static bool static_std_strings = false; /* Note: DONOTICE macro will work if applied to either PGconn or PGresult */ #define DONOTICE(conn,message) \ @@ -59,45 +66,80 @@ static int getNotice(PGconn *conn); /* --------------- * Escaping arbitrary strings to get valid SQL strings/identifiers. * - * Replaces "\\" with "\\\\" and "'" with "''". + * Replaces "'" with "''", and if not std_strings, replaces "\" with "\\". * length is the length of the buffer pointed to by * from. The buffer at to must be at least 2*length + 1 characters * long. A terminating NUL character is written. * --------------- */ - -size_t -PQescapeString(char *to, const char *from, size_t length) +static size_t +PQescapeStringInternal(PGconn *conn, + char *to, const char *from, size_t length, + int *error, + int encoding, bool std_strings) { const char *source = from; char *target = to; - unsigned int remaining = length; + size_t remaining = length; - while (remaining > 0) + if (error) + *error = 0; + + while (remaining > 0 && *source != '\0') { - switch (*source) + char c = *source; + int len; + int i; + + /* Fast path for plain ASCII */ + if (!IS_HIGHBIT_SET(c)) { - case '\\': - *target = '\\'; - target++; - *target = '\\'; - /* target and remaining are updated below. */ - break; - - case '\'': - *target = '\''; - target++; - *target = '\''; - /* target and remaining are updated below. */ - break; - - default: - *target = *source; - /* target and remaining are updated below. */ + /* Apply quoting if needed */ + if (c == '\'' || + (c == '\\' && !std_strings)) + *target++ = c; + /* Copy the character */ + *target++ = c; + source++; + remaining--; + continue; + } + + /* Slow path for possible multibyte characters */ + len = pg_encoding_mblen(encoding, source); + + /* Copy the character */ + for (i = 0; i < len; i++) + { + if (remaining == 0 || *source == '\0') + break; + *target++ = *source++; + remaining--; + } + + /* + * If we hit premature end of string (ie, incomplete multibyte + * character), try to pad out to the correct length with spaces. + * We may not be able to pad completely, but we will always be able + * to insert at least one pad space (since we'd not have quoted a + * multibyte character). This should be enough to make a string that + * the server will error out on. + */ + if (i < len) + { + if (error) + *error = 1; + if (conn) + printfPQExpBuffer(&conn->errorMessage, + libpq_gettext("incomplete multibyte character\n")); + for (; i < len; i++) + { + if (((size_t) (target - to)) / 2 >= length) + break; + *target++ = ' '; + } + break; } - source++; - target++; - remaining--; } /* Write the terminating NUL character. */ @@ -106,72 +148,109 @@ PQescapeString(char *to, const char *from, size_t length) return target - to; } +size_t +PQescapeStringConn(PGconn *conn, + char *to, const char *from, size_t length, + int *error) +{ + if (!conn) + { + /* force empty-string result */ + *to = '\0'; + if (error) + *error = 1; + return 0; + } + return PQescapeStringInternal(conn, to, from, length, error, + conn->client_encoding, + static_std_strings); +} + +size_t +PQescapeString(char *to, const char *from, size_t length) +{ + return PQescapeStringInternal(NULL, to, from, length, NULL, + PQ_static_client_encoding, + static_std_strings); +} + /* * PQescapeBytea - converts from binary string to the * minimal encoding necessary to include the string in an SQL * INSERT statement with a bytea type column as the target. * * The following transformations are applied - * '\0' == ASCII 0 == \\000 - * '\'' == ASCII 39 == \' - * '\\' == ASCII 92 == \\\\ - * anything < 0x20, or > 0x7e ---> \\ooo - * (where ooo is an octal expression) + * '\0' == ASCII 0 == \000 + * '\'' == ASCII 39 == '' + * '\\' == ASCII 92 == \\ + * anything < 0x20, or > 0x7e ---> \ooo + * (where ooo is an octal expression) + * If not std_strings, all backslashes sent to the output are doubled. */ -unsigned char * -PQescapeBytea(unsigned char *bintext, size_t binlen, size_t *bytealen) +static unsigned char * +PQescapeByteaInternal(PGconn *conn, + const unsigned char *from, size_t from_length, + size_t *to_length, bool std_strings) { - unsigned char *vp; + const unsigned char *vp; unsigned char *rp; unsigned char *result; size_t i; size_t len; + size_t bslash_len = (std_strings ? 1 : 2); /* * empty string has 1 char ('\0') */ len = 1; - vp = bintext; - for (i = binlen; i > 0; i--, vp++) + vp = from; + for (i = from_length; i > 0; i--, vp++) { if (*vp < 0x20 || *vp > 0x7e) - len += 5; /* '5' is for '\\ooo' */ + len += bslash_len + 3; else if (*vp == '\'') len += 2; else if (*vp == '\\') - len += 4; + len += bslash_len + bslash_len; else len++; } + *to_length = len; rp = result = (unsigned char *) malloc(len); if (rp == NULL) + { + if (conn) + printfPQExpBuffer(&conn->errorMessage, + libpq_gettext("out of memory\n")); return NULL; + } - vp = bintext; - *bytealen = len; - - for (i = binlen; i > 0; i--, vp++) + vp = from; + for (i = from_length; i > 0; i--, vp++) { if (*vp < 0x20 || *vp > 0x7e) { - (void) sprintf(rp, "\\\\%03o", *vp); - rp += 5; + if (!std_strings) + *rp++ = '\\'; + (void) sprintf((char *) rp, "\\%03o", *vp); + rp += 4; } else if (*vp == '\'') { - rp[0] = '\''; - rp[1] = '\''; - rp += 2; + *rp++ = '\''; + *rp++ = '\''; } else if (*vp == '\\') { - rp[0] = '\\'; - rp[1] = '\\'; - rp[2] = '\\'; - rp[3] = '\\'; - rp += 4; + if (!std_strings) + { + *rp++ = '\\'; + *rp++ = '\\'; + } + *rp++ = '\\'; + *rp++ = '\\'; } else *rp++ = *vp; @@ -181,12 +260,31 @@ PQescapeBytea(unsigned char *bintext, size_t binlen, size_t *bytealen) return result; } +unsigned char * +PQescapeByteaConn(PGconn *conn, + const unsigned char *from, size_t from_length, + size_t *to_length) +{ + if (!conn) + return NULL; + return PQescapeByteaInternal(conn, from, from_length, to_length, + static_std_strings); +} + +unsigned char * +PQescapeBytea(unsigned char *from, size_t from_length, size_t *to_length) +{ + return PQescapeByteaInternal(NULL, from, from_length, to_length, + static_std_strings); +} + + /* * PQunescapeBytea - converts the null terminated string representation * of a bytea, strtext, into binary, filling a buffer. It returns a * pointer to the buffer which is NULL on error, and the size of the * buffer in retbuflen. The pointer may subsequently be used as an - * argument to the function free(3). It is the reverse of PQescapeBytea. + * argument to the function PQfreemem. * * The following transformations are reversed: * '\0' == ASCII 0 == \000 diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h index 49045d37b9d..188dbf579c9 100644 --- a/src/interfaces/libpq/libpq-fe.h +++ b/src/interfaces/libpq/libpq-fe.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: libpq-fe.h,v 1.86 2002/09/04 20:31:47 momjian Exp $ + * $Id: libpq-fe.h,v 1.86.2.1 2006/05/21 20:20:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -248,12 +248,18 @@ extern PQnoticeProcessor PQsetNoticeProcessor(PGconn *conn, /* === in fe-exec.c === */ /* Quoting strings before inclusion in queries. */ -extern size_t PQescapeString(char *to, const char *from, size_t length); -extern unsigned char *PQescapeBytea(unsigned char *bintext, size_t binlen, - size_t *bytealen); +extern size_t PQescapeStringConn(PGconn *conn, + char *to, const char *from, size_t length, + int *error); +extern unsigned char *PQescapeByteaConn(PGconn *conn, + const unsigned char *from, size_t from_length, + size_t *to_length); extern unsigned char *PQunescapeBytea(unsigned char *strtext, size_t *retbuflen); - +/* These forms are deprecated! */ +extern size_t PQescapeString(char *to, const char *from, size_t length); +extern unsigned char *PQescapeBytea(unsigned char *from, size_t from_length, + size_t *to_length); /* Simple synchronous query */ extern PGresult *PQexec(PGconn *conn, const char *query); diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index cde2b2d2b05..1a8360c751c 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -12,7 +12,7 @@ * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: libpq-int.h,v 1.60 2002/10/16 02:55:30 momjian Exp $ + * $Id: libpq-int.h,v 1.60.2.1 2006/05/21 20:20:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -301,6 +301,9 @@ struct pg_conn */ extern char *const pgresStatus[]; +/* Added for PQescapeString fix */ +extern int PQ_static_client_encoding; + /* ---------------- * Internal functions of libpq * Functions declared here need to be visible across files of libpq, diff --git a/src/interfaces/libpq/libpqdll.def b/src/interfaces/libpq/libpqdll.def index 524a7468e8a..78269f89fa4 100644 --- a/src/interfaces/libpq/libpqdll.def +++ b/src/interfaces/libpq/libpqdll.def @@ -93,3 +93,5 @@ EXPORTS appendPQExpBuffer @ 91 pg_encoding_to_char @ 92 pg_utf_mblen @ 93 + PQescapeStringConn @ 126 + PQescapeByteaConn @ 127