diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index a86ba6089a4..2b34921e70b 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1,4 +1,4 @@ - + Server Configuration @@ -4060,6 +4060,23 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; + + bytea_output (enum) + + bytea_output configuration parameter + + + + Sets the output format for values of type bytea. + Valid values are hex (the default) + and escape (the traditional PostgreSQL + format). See for more + information. The bytea type always + accepts both formats on input, regardless of this setting. + + + + xmlbinary (enum) diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml index 774e4dbfb04..abe747a6964 100644 --- a/doc/src/sgml/datatype.sgml +++ b/doc/src/sgml/datatype.sgml @@ -1,4 +1,4 @@ - + Data Types @@ -1177,7 +1177,7 @@ SELECT b, char_length(b) FROM test2; A binary string is a sequence of octets (or bytes). Binary strings are distinguished from character strings in two - ways: First, binary strings specifically allow storing + ways. First, binary strings specifically allow storing octets of value zero and other non-printable octets (usually, octets outside the range 32 to 126). Character strings disallow zero octets, and also disallow any @@ -1191,13 +1191,82 @@ SELECT b, char_length(b) FROM test2; - When entering bytea values, octets of certain - values must be escaped (but all octet - values can be escaped) when used as part - of a string literal in an SQL statement. In + The bytea type supports two external formats for + input and output: PostgreSQL's historical + escape format, and hex format. Both + of these are always accepted on input. The output format depends + on the configuration parameter ; + the default is hex. (Note that the hex format was introduced in + PostgreSQL 8.5; earlier versions and some + tools don't understand it.) + + + + The SQL standard defines a different binary + string type, called BLOB or BINARY LARGE + OBJECT. The input format is different from + bytea, but the provided functions and operators are + mostly the same. + + + + <type>bytea</> hex format + + + The hex format encodes binary data as 2 hexadecimal digits + per byte, most significant nibble first. The entire string is + preceded by the sequence \x (to distinguish it + from the escape format). In some contexts, the initial backslash may + need to be escaped by doubling it, in the same cases in which backslashes + have to be doubled in escape format; details appear below. + The hexadecimal digits can + be either upper or lower case, and whitespace is permitted between + digit pairs (but not within a digit pair nor in the starting + \x sequence). + The hex format is compatible with a wide + range of external applications and protocols, and it tends to be + faster to convert than the escape format, so its use is preferred. + + + + Example: + +SELECT E'\\xDEADBEEF'; + + + + + + <type>bytea</> escape format + + + The escape format is the traditional + PostgreSQL format for the bytea + type. It + takes the approach of representing a binary string as a sequence + of ASCII characters, while converting those bytes that cannot be + represented as an ASCII character into special escape sequences. + If, from the point of view of the application, representing bytes + as characters makes sense, then this representation can be + convenient. But in practice it is usually confusing becauses it + fuzzes up the distinction between binary strings and character + strings, and also the particular escape mechanism that was chosen is + somewhat unwieldy. So this format should probably be avoided + for most new applications. + + + + When entering bytea values in escape format, + octets of certain + values must be escaped, while all octet + values can be escaped. In general, to escape an octet, convert it into its three-digit octal value and precede it - by two backslashes. + by a backslash (or two backslashes, if writing the value as a + literal using escape string syntax). + Backslash itself (octet value 92) can alternatively be represented by + double backslashes. + shows the characters that must be escaped, and gives the alternative escape sequences where applicable. @@ -1343,14 +1412,7 @@ SELECT b, char_length(b) FROM test2; have to escape line feeds and carriage returns if your interface automatically translates these. - - - The SQL standard defines a different binary - string type, called BLOB or BINARY LARGE - OBJECT. The input format is different from - bytea, but the provided functions and operators are - mostly the same. - + diff --git a/src/backend/catalog/pg_largeobject.c b/src/backend/catalog/pg_largeobject.c index 925d21387b8..313ccdd3f07 100644 --- a/src/backend/catalog/pg_largeobject.c +++ b/src/backend/catalog/pg_largeobject.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/catalog/pg_largeobject.c,v 1.32 2009/01/01 17:23:37 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/catalog/pg_largeobject.c,v 1.33 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -18,7 +18,7 @@ #include "access/heapam.h" #include "catalog/indexing.h" #include "catalog/pg_largeobject.h" -#include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/fmgroids.h" #include "utils/rel.h" #include "utils/tqual.h" diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 7bc82127893..c1f55cfcf93 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.251 2009/07/30 02:45:36 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.252 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ #include "tcop/utility.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/fmgroids.h" #include "utils/inval.h" #include "utils/lsyscache.h" diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index b3f96eb773c..3930acf05a7 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.240 2009/06/11 14:48:58 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.241 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -31,6 +31,7 @@ #include "optimizer/restrictinfo.h" #include "optimizer/var.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" #include "utils/selfuncs.h" diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index 480b85cefcb..e581e3bc42d 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.23 2009/01/01 17:23:49 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.24 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -109,7 +109,7 @@ binary_decode(PG_FUNCTION_ARGS) * HEX */ -static const char *hextbl = "0123456789abcdef"; +static const char hextbl[] = "0123456789abcdef"; static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -122,7 +122,7 @@ static const int8 hexlookup[128] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, }; -static unsigned +unsigned hex_encode(const char *src, unsigned len, char *dst) { const char *end = src + len; @@ -136,7 +136,7 @@ hex_encode(const char *src, unsigned len, char *dst) return len * 2; } -static char +static inline char get_hex(char c) { int res = -1; @@ -152,7 +152,7 @@ get_hex(char c) return (char) res; } -static unsigned +unsigned hex_decode(const char *src, unsigned len, char *dst) { const char *s, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 3d60885a795..e85ab06819c 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.261 2009/06/11 14:49:04 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.262 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -109,6 +109,7 @@ #include "parser/parse_coerce.h" #include "parser/parsetree.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/date.h" #include "utils/datum.h" #include "utils/fmgroids.h" diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index b9b54e6db63..c524454432e 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.171 2009/06/11 14:49:04 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.172 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -24,10 +24,14 @@ #include "parser/scansup.h" #include "regex/regex.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/lsyscache.h" #include "utils/pg_locale.h" +/* GUC variable */ +int bytea_output = BYTEA_OUTPUT_HEX; + typedef struct varlena unknown; typedef struct @@ -186,10 +190,24 @@ byteain(PG_FUNCTION_ARGS) char *inputText = PG_GETARG_CSTRING(0); char *tp; char *rp; - int byte; + int bc; bytea *result; - for (byte = 0, tp = inputText; *tp != '\0'; byte ++) + /* Recognize hex input */ + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + + bc = (len - 2)/2 + VARHDRSZ; /* maximum possible length */ + result = palloc(bc); + bc = hex_decode(inputText + 2, len - 2, VARDATA(result)); + SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ + + PG_RETURN_BYTEA_P(result); + } + + /* Else, it's the traditional escaped style */ + for (bc = 0, tp = inputText; *tp != '\0'; bc++) { if (tp[0] != '\\') tp++; @@ -204,7 +222,7 @@ byteain(PG_FUNCTION_ARGS) else { /* - * one backslash, not followed by 0 or ### valid octal + * one backslash, not followed by another or ### valid octal */ ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), @@ -212,10 +230,10 @@ byteain(PG_FUNCTION_ARGS) } } - byte +=VARHDRSZ; + bc += VARHDRSZ; - result = (bytea *) palloc(byte); - SET_VARSIZE(result, byte); + result = (bytea *) palloc(bc); + SET_VARSIZE(result, bc); tp = inputText; rp = VARDATA(result); @@ -228,11 +246,11 @@ byteain(PG_FUNCTION_ARGS) (tp[2] >= '0' && tp[2] <= '7') && (tp[3] >= '0' && tp[3] <= '7')) { - byte = VAL(tp[1]); - byte <<=3; - byte +=VAL(tp[2]); - byte <<=3; - *rp++ = byte +VAL(tp[3]); + bc = VAL(tp[1]); + bc <<= 3; + bc += VAL(tp[2]); + bc <<= 3; + *rp++ = bc + VAL(tp[3]); tp += 4; } @@ -259,21 +277,30 @@ byteain(PG_FUNCTION_ARGS) /* * byteaout - converts to printable representation of byte array * - * Non-printable characters are inserted as '\nnn' (octal) and '\' as - * '\\'. - * - * NULL vlena should be an error--returning string with NULL for now. + * In the traditional escaped format, non-printable characters are + * printed as '\nnn' (octal) and '\' as '\\'. */ Datum byteaout(PG_FUNCTION_ARGS) { bytea *vlena = PG_GETARG_BYTEA_PP(0); char *result; - char *vp; char *rp; - int val; /* holds unprintable chars */ - int i; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + /* Print hex format */ + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { + /* Print traditional escaped format */ + char *vp; int len; + int i; len = 1; /* empty string has 1 char */ vp = VARDATA_ANY(vlena); @@ -297,6 +324,8 @@ byteaout(PG_FUNCTION_ARGS) } else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) { + int val; /* holds unprintable chars */ + val = *vp; rp[0] = '\\'; rp[3] = DIG(val & 07); @@ -309,6 +338,13 @@ byteaout(PG_FUNCTION_ARGS) else *rp++ = *vp; } + } + else + { + elog(ERROR, "unrecognized bytea_output setting: %d", + bytea_output); + rp = result = NULL; /* keep compiler quiet */ + } *rp = '\0'; PG_RETURN_CSTRING(result); } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 76d3ec9da65..264b45451a9 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.509 2009/07/22 17:00:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.510 2009/08/04 16:08:36 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -61,6 +61,7 @@ #include "tcop/tcopprot.h" #include "tsearch/ts_cache.h" #include "utils/builtins.h" +#include "utils/bytea.h" #include "utils/guc_tables.h" #include "utils/memutils.h" #include "utils/pg_locale.h" @@ -180,6 +181,12 @@ static char *config_enum_get_options(struct config_enum * record, * NOTE! Option values may not contain double quotes! */ +static const struct config_enum_entry bytea_output_options[] = { + {"escape", BYTEA_OUTPUT_ESCAPE, false}, + {"hex", BYTEA_OUTPUT_HEX, false}, + {NULL, 0, false} +}; + /* * We have different sets for client and server message level options because * they sort slightly different (see "log" level) @@ -2540,6 +2547,15 @@ static struct config_enum ConfigureNamesEnum[] = BACKSLASH_QUOTE_SAFE_ENCODING, backslash_quote_options, NULL, NULL }, + { + {"bytea_output", PGC_USERSET, CLIENT_CONN_STATEMENT, + gettext_noop("Sets the output format for bytea."), + NULL + }, + &bytea_output, + BYTEA_OUTPUT_HEX, bytea_output_options, NULL, NULL + }, + { {"client_min_messages", PGC_USERSET, LOGGING_WHEN, gettext_noop("Sets the message levels that are sent to the client."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index e50d7a44f7b..41488e264f0 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -424,6 +424,7 @@ #statement_timeout = 0 # in milliseconds, 0 is disabled #vacuum_freeze_min_age = 50000000 #vacuum_freeze_table_age = 150000000 +#bytea_output = 'hex' # hex, escape #xmlbinary = 'base64' #xmloption = 'content' diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 360eaf1caf7..b46c068d4ec 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -12,7 +12,7 @@ * by PostgreSQL * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/pg_dump/pg_dump.c,v 1.544 2009/08/02 22:14:52 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/pg_dump/pg_dump.c,v 1.545 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -11008,6 +11008,8 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo) TableInfo *tbinfo = tginfo->tgtable; PQExpBuffer query; PQExpBuffer delqry; + char *tgargs; + size_t lentgargs; const char *p; int findx; @@ -11109,53 +11111,29 @@ dumpTrigger(Archive *fout, TriggerInfo *tginfo) appendPQExpBuffer(query, "EXECUTE PROCEDURE %s(", fmtId(tginfo->tgfname)); - p = tginfo->tgargs; + tgargs = (char *) PQunescapeBytea(tginfo->tgargs, &lentgargs); + p = tgargs; for (findx = 0; findx < tginfo->tgnargs; findx++) { - const char *s = p; + /* find the embedded null that terminates this trigger argument */ + size_t tlen = strlen(p); - /* Set 'p' to end of arg string. marked by '\000' */ - for (;;) + if (p + tlen >= tgargs + lentgargs) { - p = strchr(p, '\\'); - if (p == NULL) - { - write_msg(NULL, "invalid argument string (%s) for trigger \"%s\" on table \"%s\"\n", - tginfo->tgargs, - tginfo->dobj.name, - tbinfo->dobj.name); - exit_nicely(); - } - p++; - if (*p == '\\') /* is it '\\'? */ - { - p++; - continue; - } - if (p[0] == '0' && p[1] == '0' && p[2] == '0') /* is it '\000'? */ - break; + /* hm, not found before end of bytea value... */ + write_msg(NULL, "invalid argument string (%s) for trigger \"%s\" on table \"%s\"\n", + tginfo->tgargs, + tginfo->dobj.name, + tbinfo->dobj.name); + exit_nicely(); } - p--; - appendPQExpBufferChar(query, '\''); - while (s < p) - { - if (*s == '\'') - appendPQExpBufferChar(query, '\''); - - /* - * bytea unconditionally doubles backslashes, so we suppress the - * doubling for standard_conforming_strings. - */ - if (fout->std_strings && *s == '\\' && s[1] == '\\') - s++; - appendPQExpBufferChar(query, *s++); - } - appendPQExpBufferChar(query, '\''); - appendPQExpBuffer(query, - (findx < tginfo->tgnargs - 1) ? ", " : ""); - p = p + 4; + if (findx > 0) + appendPQExpBuffer(query, ", "); + appendStringLiteralAH(query, p, fout); + p += tlen + 1; } + free(tgargs); appendPQExpBuffer(query, ");\n"); if (tginfo->tgenabled != 't' && tginfo->tgenabled != 'O') diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 4b92cbcb608..b664799fc47 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.337 2009/08/03 21:11:39 joe Exp $ + * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.338 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -138,6 +138,12 @@ extern Datum char_text(PG_FUNCTION_ARGS); extern Datum domain_in(PG_FUNCTION_ARGS); extern Datum domain_recv(PG_FUNCTION_ARGS); +/* encode.c */ +extern Datum binary_encode(PG_FUNCTION_ARGS); +extern Datum binary_decode(PG_FUNCTION_ARGS); +extern unsigned hex_encode(const char *src, unsigned len, char *dst); +extern unsigned hex_decode(const char *src, unsigned len, char *dst); + /* enum.c */ extern Datum enum_in(PG_FUNCTION_ARGS); extern Datum enum_out(PG_FUNCTION_ARGS); @@ -711,28 +717,6 @@ extern Datum unknownout(PG_FUNCTION_ARGS); extern Datum unknownrecv(PG_FUNCTION_ARGS); extern Datum unknownsend(PG_FUNCTION_ARGS); -extern Datum byteain(PG_FUNCTION_ARGS); -extern Datum byteaout(PG_FUNCTION_ARGS); -extern Datum bytearecv(PG_FUNCTION_ARGS); -extern Datum byteasend(PG_FUNCTION_ARGS); -extern Datum byteaoctetlen(PG_FUNCTION_ARGS); -extern Datum byteaGetByte(PG_FUNCTION_ARGS); -extern Datum byteaGetBit(PG_FUNCTION_ARGS); -extern Datum byteaSetByte(PG_FUNCTION_ARGS); -extern Datum byteaSetBit(PG_FUNCTION_ARGS); -extern Datum binary_encode(PG_FUNCTION_ARGS); -extern Datum binary_decode(PG_FUNCTION_ARGS); -extern Datum byteaeq(PG_FUNCTION_ARGS); -extern Datum byteane(PG_FUNCTION_ARGS); -extern Datum bytealt(PG_FUNCTION_ARGS); -extern Datum byteale(PG_FUNCTION_ARGS); -extern Datum byteagt(PG_FUNCTION_ARGS); -extern Datum byteage(PG_FUNCTION_ARGS); -extern Datum byteacmp(PG_FUNCTION_ARGS); -extern Datum byteacat(PG_FUNCTION_ARGS); -extern Datum byteapos(PG_FUNCTION_ARGS); -extern Datum bytea_substr(PG_FUNCTION_ARGS); -extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); extern Datum pg_column_size(PG_FUNCTION_ARGS); /* version.c */ diff --git a/src/include/utils/bytea.h b/src/include/utils/bytea.h new file mode 100644 index 00000000000..8750d6d0e9c --- /dev/null +++ b/src/include/utils/bytea.h @@ -0,0 +1,50 @@ +/*------------------------------------------------------------------------- + * + * bytea.h + * Declarations for BYTEA data type support. + * + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/utils/bytea.h,v 1.1 2009/08/04 16:08:36 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef BYTEA_H +#define BYTEA_H + +#include "fmgr.h" + + +typedef enum +{ + BYTEA_OUTPUT_ESCAPE, + BYTEA_OUTPUT_HEX +} ByteaOutputType; + +extern int bytea_output; /* ByteaOutputType, but int for GUC enum */ + +/* functions are in utils/adt/varlena.c */ +extern Datum byteain(PG_FUNCTION_ARGS); +extern Datum byteaout(PG_FUNCTION_ARGS); +extern Datum bytearecv(PG_FUNCTION_ARGS); +extern Datum byteasend(PG_FUNCTION_ARGS); +extern Datum byteaoctetlen(PG_FUNCTION_ARGS); +extern Datum byteaGetByte(PG_FUNCTION_ARGS); +extern Datum byteaGetBit(PG_FUNCTION_ARGS); +extern Datum byteaSetByte(PG_FUNCTION_ARGS); +extern Datum byteaSetBit(PG_FUNCTION_ARGS); +extern Datum byteaeq(PG_FUNCTION_ARGS); +extern Datum byteane(PG_FUNCTION_ARGS); +extern Datum bytealt(PG_FUNCTION_ARGS); +extern Datum byteale(PG_FUNCTION_ARGS); +extern Datum byteagt(PG_FUNCTION_ARGS); +extern Datum byteage(PG_FUNCTION_ARGS); +extern Datum byteacmp(PG_FUNCTION_ARGS); +extern Datum byteacat(PG_FUNCTION_ARGS); +extern Datum byteapos(PG_FUNCTION_ARGS); +extern Datum bytea_substr(PG_FUNCTION_ARGS); +extern Datum bytea_substr_no_len(PG_FUNCTION_ARGS); + +#endif /* BYTEA_H */ diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index c00f5eae6ab..f1318a4a942 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/interfaces/libpq/fe-exec.c,v 1.203 2009/06/11 14:49:13 momjian Exp $ + * $PostgreSQL: pgsql/src/interfaces/libpq/fe-exec.c,v 1.204 2009/08/04 16:08:36 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3167,6 +3167,29 @@ PQescapeBytea(const unsigned char *from, size_t from_length, size_t *to_length) } +static const int8 hexlookup[128] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +}; + +static inline char +get_hex(char c) +{ + int res = -1; + + if (c > 0 && c < 127) + res = hexlookup[(unsigned char) c]; + + return (char) res; +} + + #define ISFIRSTOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '3') #define ISOCTDIGIT(CH) ((CH) >= '0' && (CH) <= '7') #define OCTVAL(CH) ((CH) - '0') @@ -3198,6 +3221,40 @@ PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen) strtextlen = strlen((const char *) strtext); + if (strtext[0] == '\\' && strtext[1] == 'x') + { + const unsigned char *s; + unsigned char *p; + + buflen = (strtextlen - 2)/2; + /* Avoid unportable malloc(0) */ + buffer = (unsigned char *) malloc(buflen > 0 ? buflen : 1); + if (buffer == NULL) + return NULL; + + s = strtext + 2; + p = buffer; + while (*s) + { + char v1, + v2; + + /* + * Bad input is silently ignored. Note that this includes + * whitespace between hex pairs, which is allowed by byteain. + */ + v1 = get_hex(*s++); + if (!*s || v1 == (char) -1) + continue; + v2 = get_hex(*s++); + if (v2 != (char) -1) + *p++ = (v1 << 4) | v2; + } + + buflen = p - buffer; + } + else + { /* * Length of input is max length of output, but add one to avoid * unportable malloc(0) if input is zero-length. @@ -3244,6 +3301,7 @@ PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen) } } buflen = j; /* buflen is the length of the dequoted data */ + } /* Shrink the buffer to be no larger than necessary */ /* +1 avoids unportable behavior when buflen==0 */ diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out index e80e1a45343..82eca262f09 100644 --- a/src/test/regress/expected/conversion.out +++ b/src/test/regress/expected/conversion.out @@ -1,3 +1,5 @@ +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- -- create user defined conversion -- diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 1241a2ace63..392f48ef8c6 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -97,6 +97,99 @@ LINE 1: SELECT U&'wrong: +0061' UESCAPE '+'; ^ DETAIL: String constants with Unicode escapes cannot be used when standard_conforming_strings is off. RESET standard_conforming_strings; +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------ + \xdeadbeef +(1 row) + +SELECT E'\\xDeAdBeE'::bytea; +ERROR: invalid hexadecimal data: odd number of digits +LINE 1: SELECT E'\\xDeAdBeE'::bytea; + ^ +SELECT E'\\xDeAdBeEx'::bytea; +ERROR: invalid hexadecimal digit: "x" +LINE 1: SELECT E'\\xDeAdBeEx'::bytea; + ^ +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------ + \xde00beef +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +-------------------- + \x4465416442654566 +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +-------------------- + \x4465006442654566 +(1 row) + +SELECT E'De\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +-------------------- + \x4465536442654566 +(1 row) + +SELECT E'De\\678dBeEf'::bytea; +ERROR: invalid input syntax for type bytea +LINE 1: SELECT E'De\\678dBeEf'::bytea; + ^ +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\x De Ad Be Ef '::bytea; + bytea +------------------ + \336\255\276\357 +(1 row) + +SELECT E'\\xDe00BeEf'::bytea; + bytea +------------------ + \336\000\276\357 +(1 row) + +SELECT E'DeAdBeEf'::bytea; + bytea +---------- + DeAdBeEf +(1 row) + +SELECT E'De\\000dBeEf'::bytea; + bytea +------------- + De\000dBeEf +(1 row) + +SELECT E'De\\123dBeEf'::bytea; + bytea +---------- + DeSdBeEf +(1 row) + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types diff --git a/src/test/regress/input/largeobject.source b/src/test/regress/input/largeobject.source index 46ba9261ac5..807cfd7cc46 100644 --- a/src/test/regress/input/largeobject.source +++ b/src/test/regress/input/largeobject.source @@ -2,6 +2,9 @@ -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; + -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject.source b/src/test/regress/output/largeobject.source index 9d69f6c913e..d7468bb5131 100644 --- a/src/test/regress/output/largeobject.source +++ b/src/test/regress/output/largeobject.source @@ -1,6 +1,8 @@ -- -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/output/largeobject_1.source b/src/test/regress/output/largeobject_1.source index 1fbc29c2517..84e916fea45 100644 --- a/src/test/regress/output/largeobject_1.source +++ b/src/test/regress/output/largeobject_1.source @@ -1,6 +1,8 @@ -- -- Test large object support -- +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; -- Load a file CREATE TABLE lotest_stash_values (loid oid, fd integer); -- lo_creat(mode integer) returns oid diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql index 99a9178315e..be194eec1f1 100644 --- a/src/test/regress/sql/conversion.sql +++ b/src/test/regress/sql/conversion.sql @@ -1,3 +1,6 @@ +-- ensure consistent test output regardless of the default bytea format +SET bytea_output TO escape; + -- -- create user defined conversion -- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 681a0e1e62c..63df9402ed7 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -43,6 +43,27 @@ SELECT U&'wrong: +0061' UESCAPE '+'; RESET standard_conforming_strings; +-- bytea +SET bytea_output TO hex; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDeAdBeE'::bytea; +SELECT E'\\xDeAdBeEx'::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\123dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; +SELECT E'De\\678dBeEf'::bytea; + +SET bytea_output TO escape; +SELECT E'\\xDeAdBeEf'::bytea; +SELECT E'\\x De Ad Be Ef '::bytea; +SELECT E'\\xDe00BeEf'::bytea; +SELECT E'DeAdBeEf'::bytea; +SELECT E'De\\000dBeEf'::bytea; +SELECT E'De\\123dBeEf'::bytea; + -- -- test conversions between various string types -- E021-10 implicit casting among the character data types