mirror of
https://github.com/postgres/postgres.git
synced 2025-05-23 00:02:38 -04:00
Add regression tests for CSV and \., and add automatic quoting of a
single column dump that has a \. value, so the load works properly. I also added documentation describing this issue.
This commit is contained in:
parent
1b184c990f
commit
87289ff35c
@ -1,5 +1,5 @@
|
|||||||
<!--
|
<!--
|
||||||
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.70 2005/10/15 20:12:33 neilc Exp $
|
$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.71 2005/12/28 03:25:32 momjian Exp $
|
||||||
PostgreSQL documentation
|
PostgreSQL documentation
|
||||||
-->
|
-->
|
||||||
|
|
||||||
@ -511,17 +511,28 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
|
|||||||
comparisons for specific columns.
|
comparisons for specific columns.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Because backslash is not a special character in the <literal>CSV</>
|
||||||
|
format, <literal>\.</>, the end-of-data marker, could also appear
|
||||||
|
as a data value. To avoid any misinterpretation, a <literal>\.</>
|
||||||
|
data value appearing as a lone entry on a line is automatically
|
||||||
|
quoted on output, and on input, if quoted, is not interpreted as the
|
||||||
|
end-of-data marker. If you are loading a single-column table that
|
||||||
|
might have a column value of <literal>\.</>, you might need to quote
|
||||||
|
that value in the input file.
|
||||||
|
</para>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
<para>
|
<para>
|
||||||
In <literal>CSV</> mode, all characters are significant. A quoted value
|
In <literal>CSV</> mode, all characters are significant. A quoted value
|
||||||
surrounded by white space, or any characters other than
|
surrounded by white space, or any characters other than
|
||||||
<literal>DELIMITER</>, will include those characters. This can cause
|
<literal>DELIMITER</>, will include those characters. This can cause
|
||||||
errors if you import data from a system that pads <literal>CSV</>
|
errors if you import data from a system that pads <literal>CSV</>
|
||||||
lines with white space out to some fixed width. If such a situation
|
lines with white space out to some fixed width. If such a situation
|
||||||
arises you might need to preprocess the <literal>CSV</> file to remove
|
arises you might need to preprocess the <literal>CSV</> file to remove
|
||||||
the trailing white space, before importing the data into
|
the trailing white space, before importing the data into
|
||||||
<productname>PostgreSQL</>.
|
<productname>PostgreSQL</>.
|
||||||
</para>
|
</para>
|
||||||
</note>
|
</note>
|
||||||
|
|
||||||
<note>
|
<note>
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.256 2005/12/27 18:10:48 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.257 2005/12/28 03:25:32 momjian Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -244,7 +244,7 @@ static Datum CopyReadBinaryAttribute(CopyState cstate,
|
|||||||
bool *isnull);
|
bool *isnull);
|
||||||
static void CopyAttributeOutText(CopyState cstate, char *server_string);
|
static void CopyAttributeOutText(CopyState cstate, char *server_string);
|
||||||
static void CopyAttributeOutCSV(CopyState cstate, char *server_string,
|
static void CopyAttributeOutCSV(CopyState cstate, char *server_string,
|
||||||
bool use_quote);
|
bool use_quote, bool single_attr);
|
||||||
static List *CopyGetAttnums(Relation rel, List *attnamelist);
|
static List *CopyGetAttnums(Relation rel, List *attnamelist);
|
||||||
static char *limit_printout_length(const char *str);
|
static char *limit_printout_length(const char *str);
|
||||||
|
|
||||||
@ -1284,7 +1284,8 @@ CopyTo(CopyState cstate)
|
|||||||
|
|
||||||
colname = NameStr(attr[attnum - 1]->attname);
|
colname = NameStr(attr[attnum - 1]->attname);
|
||||||
|
|
||||||
CopyAttributeOutCSV(cstate, colname, false);
|
CopyAttributeOutCSV(cstate, colname, false,
|
||||||
|
list_length(cstate->attnumlist) == 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
CopySendEndOfRow(cstate);
|
CopySendEndOfRow(cstate);
|
||||||
@ -1359,7 +1360,8 @@ CopyTo(CopyState cstate)
|
|||||||
value));
|
value));
|
||||||
if (cstate->csv_mode)
|
if (cstate->csv_mode)
|
||||||
CopyAttributeOutCSV(cstate, string,
|
CopyAttributeOutCSV(cstate, string,
|
||||||
force_quote[attnum - 1]);
|
force_quote[attnum - 1],
|
||||||
|
list_length(cstate->attnumlist) == 1);
|
||||||
else
|
else
|
||||||
CopyAttributeOutText(cstate, string);
|
CopyAttributeOutText(cstate, string);
|
||||||
}
|
}
|
||||||
@ -2968,7 +2970,7 @@ CopyAttributeOutText(CopyState cstate, char *server_string)
|
|||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
CopyAttributeOutCSV(CopyState cstate, char *server_string,
|
CopyAttributeOutCSV(CopyState cstate, char *server_string,
|
||||||
bool use_quote)
|
bool use_quote, bool single_attr)
|
||||||
{
|
{
|
||||||
char *string;
|
char *string;
|
||||||
char c;
|
char c;
|
||||||
@ -2993,17 +2995,27 @@ CopyAttributeOutCSV(CopyState cstate, char *server_string,
|
|||||||
*/
|
*/
|
||||||
if (!use_quote)
|
if (!use_quote)
|
||||||
{
|
{
|
||||||
for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
|
/*
|
||||||
{
|
* Because '\.' can be a data value, quote it if it appears
|
||||||
if (c == delimc || c == quotec || c == '\n' || c == '\r')
|
* alone on a line so it is not interpreted as the end-of-data
|
||||||
|
* marker.
|
||||||
|
*/
|
||||||
|
if (single_attr && strcmp(string, "\\.") == 0)
|
||||||
|
use_quote = true;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
|
||||||
{
|
{
|
||||||
use_quote = true;
|
if (c == delimc || c == quotec || c == '\n' || c == '\r')
|
||||||
break;
|
{
|
||||||
|
use_quote = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
|
||||||
|
mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
|
||||||
|
else
|
||||||
|
mblen = 1;
|
||||||
}
|
}
|
||||||
if (cstate->encoding_embeds_ascii && IS_HIGHBIT_SET(c))
|
|
||||||
mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
|
|
||||||
else
|
|
||||||
mblen = 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -194,6 +194,9 @@ COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
|
|||||||
--test that we read consecutive LFs properly
|
--test that we read consecutive LFs properly
|
||||||
CREATE TEMP TABLE testnl (a int, b text, c int);
|
CREATE TEMP TABLE testnl (a int, b text, c int);
|
||||||
COPY testnl FROM stdin CSV;
|
COPY testnl FROM stdin CSV;
|
||||||
|
-- test end of copy marker
|
||||||
|
CREATE TEMP TABLE testeoc (a text);
|
||||||
|
COPY testeoc FROM stdin CSV;
|
||||||
DROP TABLE x, y;
|
DROP TABLE x, y;
|
||||||
DROP FUNCTION fn_x_before();
|
DROP FUNCTION fn_x_before();
|
||||||
DROP FUNCTION fn_x_after();
|
DROP FUNCTION fn_x_after();
|
||||||
|
@ -139,6 +139,16 @@ COPY testnl FROM stdin CSV;
|
|||||||
inside",2
|
inside",2
|
||||||
\.
|
\.
|
||||||
|
|
||||||
|
-- test end of copy marker
|
||||||
|
CREATE TEMP TABLE testeoc (a text);
|
||||||
|
|
||||||
|
COPY testeoc FROM stdin CSV;
|
||||||
|
a\.
|
||||||
|
\.b
|
||||||
|
c\.d
|
||||||
|
"\."
|
||||||
|
\.
|
||||||
|
|
||||||
|
|
||||||
DROP TABLE x, y;
|
DROP TABLE x, y;
|
||||||
DROP FUNCTION fn_x_before();
|
DROP FUNCTION fn_x_before();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user