mirror of
https://github.com/postgres/postgres.git
synced 2025-06-05 00:02:04 -04:00
New file format for COPY BINARY, in accordance with pghackers discussions
of early December 2000. COPY BINARY is now TOAST-safe.
This commit is contained in:
parent
8fd2e269f7
commit
676cf18c5b
@ -1,5 +1,5 @@
|
|||||||
<!--
|
<!--
|
||||||
$Header: /cvsroot/pgsql/doc/src/sgml/ref/copy.sgml,v 1.18 2000/10/05 19:48:17 momjian Exp $
|
$Header: /cvsroot/pgsql/doc/src/sgml/ref/copy.sgml,v 1.19 2001/01/03 20:04:09 tgl Exp $
|
||||||
Postgres documentation
|
Postgres documentation
|
||||||
-->
|
-->
|
||||||
|
|
||||||
@ -49,6 +49,7 @@ COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
|
|||||||
<para>
|
<para>
|
||||||
Changes the behavior of field formatting, forcing all data to be
|
Changes the behavior of field formatting, forcing all data to be
|
||||||
stored or read in binary format rather than as text.
|
stored or read in binary format rather than as text.
|
||||||
|
The DELIMITERS and WITH NULL options are irrelevant for binary format.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -66,7 +67,7 @@ COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
|
|||||||
<term>WITH OIDS</term>
|
<term>WITH OIDS</term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Copies the internal unique object id (OID) for each row.
|
Specifies copying the internal unique object id (OID) for each row.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -84,7 +85,7 @@ COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
|
|||||||
<term><filename>stdin</filename></term>
|
<term><filename>stdin</filename></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Specifies that input comes from a pipe or terminal.
|
Specifies that input comes from the client application.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -93,7 +94,7 @@ COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
|
|||||||
<term><filename>stdout</filename></term>
|
<term><filename>stdout</filename></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
Specifies that output goes to a pipe or terminal.
|
Specifies that output goes to the client application.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
@ -102,16 +103,16 @@ COPY [ BINARY ] <replaceable class="parameter">table</replaceable> [ WITH OIDS ]
|
|||||||
<term><replaceable class="parameter">delimiter</replaceable></term>
|
<term><replaceable class="parameter">delimiter</replaceable></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
A character that delimits the input or output fields.
|
The character that separates fields within each row (line) of the file.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><replaceable class="parameter">null print</replaceable></term>
|
<term><replaceable class="parameter">null string</replaceable></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
A string to represent NULL values. The default is
|
The string that represents a NULL value. The default is
|
||||||
<quote><literal>\N</literal></quote> (backslash-N).
|
<quote><literal>\N</literal></quote> (backslash-N).
|
||||||
You might prefer an empty string, for example.
|
You might prefer an empty string, for example.
|
||||||
</para>
|
</para>
|
||||||
@ -166,7 +167,7 @@ ERROR: <replaceable>reason</replaceable>
|
|||||||
|
|
||||||
<refsect1 id="R1-SQL-COPY-1">
|
<refsect1 id="R1-SQL-COPY-1">
|
||||||
<refsect1info>
|
<refsect1info>
|
||||||
<date>1998-09-08</date>
|
<date>2001-01-02</date>
|
||||||
</refsect1info>
|
</refsect1info>
|
||||||
<title>
|
<title>
|
||||||
Description
|
Description
|
||||||
@ -176,17 +177,36 @@ ERROR: <replaceable>reason</replaceable>
|
|||||||
<productname>Postgres</productname> tables and
|
<productname>Postgres</productname> tables and
|
||||||
standard file-system files.
|
standard file-system files.
|
||||||
|
|
||||||
|
<command>COPY TO</command> copies the entire contents of a table to
|
||||||
|
a file, while <command>COPY FROM</command> copies data from a file to a
|
||||||
|
table (appending the data to whatever is in the table already).
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
<command>COPY</command> instructs
|
<command>COPY</command> instructs
|
||||||
the <productname>Postgres</productname> backend
|
the <productname>Postgres</productname> backend
|
||||||
to directly read from or write to a file. The file must be directly visible to
|
to directly read from or write to a file. If a file name is specified,
|
||||||
the backend and the name must be specified from the viewpoint of the backend.
|
the file must be accessible to the backend and the name must be specified
|
||||||
If <filename>stdin</filename> or <filename>stdout</filename> are
|
from the viewpoint of the backend.
|
||||||
|
If <filename>stdin</filename> or <filename>stdout</filename> is
|
||||||
specified, data flows through the client frontend to the backend.
|
specified, data flows through the client frontend to the backend.
|
||||||
|
|
||||||
|
<tip>
|
||||||
|
<para>
|
||||||
|
Do not confuse <command>COPY</command> with the
|
||||||
|
<application>psql</application> instruction <command>\copy</command>.
|
||||||
|
<command>\copy</command> invokes <command>COPY FROM stdin</command>
|
||||||
|
or <command>COPY TO stdout</command>, and then fetches/stores the data
|
||||||
|
in a file accessible to the <application>psql</application> client.
|
||||||
|
Thus, file accessibility and access rights depend on the client
|
||||||
|
rather than the backend when <command>\copy</command> is used.
|
||||||
|
</para>
|
||||||
|
</tip>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<refsect2 id="R2-SQL-COPY-3">
|
<refsect2 id="R2-SQL-COPY-3">
|
||||||
<refsect2info>
|
<refsect2info>
|
||||||
<date>1998-09-08</date>
|
<date>2001-01-02</date>
|
||||||
</refsect2info>
|
</refsect2info>
|
||||||
<title>
|
<title>
|
||||||
Notes
|
Notes
|
||||||
@ -194,16 +214,19 @@ ERROR: <replaceable>reason</replaceable>
|
|||||||
<para>
|
<para>
|
||||||
The BINARY keyword will force all data to be
|
The BINARY keyword will force all data to be
|
||||||
stored/read as binary format rather than as text. It is
|
stored/read as binary format rather than as text. It is
|
||||||
somewhat faster than the normal copy command, but is not
|
somewhat faster than the normal copy command, but a binary copy
|
||||||
generally portable, and the files generated are somewhat larger,
|
file is not portable across machine architectures.
|
||||||
although this factor is highly dependent on the data itself.
|
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
By default, a text copy uses a tab ("\t") character as a delimiter.
|
By default, a text copy uses a tab ("\t") character as a delimiter
|
||||||
The delimiter may also be changed to any other single character
|
between fields. The field delimiter may be changed to any other single
|
||||||
with the keyword phrase USING DELIMITERS. Characters
|
character with the keyword phrase USING DELIMITERS. Characters
|
||||||
in data fields which happen to match the delimiter character will
|
in data fields which happen to match the delimiter character will
|
||||||
be backslash quoted.
|
be backslash quoted.
|
||||||
|
Note that the delimiter is always a single character.
|
||||||
|
If multiple characters are specified in the delimiter string,
|
||||||
|
only the first character is used.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
@ -217,67 +240,63 @@ ERROR: <replaceable>reason</replaceable>
|
|||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
The keyword phrase USING DELIMITERS specifies a single character
|
<command>COPY TO</command> neither invokes rules nor acts on column
|
||||||
to be used for all delimiters between columns. If multiple characters
|
defaults. It does invoke triggers and check constraints.
|
||||||
are specified in the delimiter string, only the first character is
|
</para>
|
||||||
used.
|
|
||||||
|
<para>
|
||||||
|
<command>COPY</command> stops operation at the first error. This
|
||||||
|
should not lead to problems in the event of
|
||||||
|
a <command>COPY FROM</command>, but the
|
||||||
|
target relation will already have received earlier rows in a
|
||||||
|
<command>COPY TO</command>. These rows will not be visible or
|
||||||
|
accessible, but they still occupy disk space. This may amount to a
|
||||||
|
considerable amount
|
||||||
|
of wasted disk space if the failure happened well into a large copy
|
||||||
|
operation. You may wish to invoke <command>VACUUM</command> to recover
|
||||||
|
the wasted space.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Files named in a <command>COPY</command> command are read or written
|
||||||
|
directly by the backend, not by the client application. Therefore,
|
||||||
|
they must reside on or be accessible to the database server machine,
|
||||||
|
not the client. They must be accessible to and readable or writable
|
||||||
|
by the Postgres user (the userid the backend runs as), not the client.
|
||||||
|
<command>COPY</command> naming a file is only allowed to database
|
||||||
|
superusers, since it allows writing on any file that the backend has
|
||||||
|
privileges to write on.
|
||||||
|
|
||||||
<tip>
|
<tip>
|
||||||
<para>
|
<para>
|
||||||
Do not confuse <command>COPY</command> with the
|
The
|
||||||
<application>psql</application> instruction <command>\copy</command>.
|
<application>psql</application> instruction <command>\copy</command>
|
||||||
|
reads or writes files on the client machine with the client's
|
||||||
|
permissions, so it is not restricted to superusers.
|
||||||
</para>
|
</para>
|
||||||
</tip>
|
</tip>
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
<command>COPY</command> neither invokes rules nor acts on column defaults.
|
It is recommended that the filename used in <command>COPY</command>
|
||||||
It does invoke triggers, however.
|
always be specified as an absolute path. This is enforced by the backend
|
||||||
</para>
|
in the case of <command>COPY TO</command>, but for <command>COPY
|
||||||
<para>
|
FROM</command> you do have the option of reading from a file specified
|
||||||
<command>COPY</command> stops operation at the first error. This
|
by a relative path. The path will be interpreted relative to the
|
||||||
should not lead to problems in the event of
|
backend's working directory (somewhere below
|
||||||
a <command>COPY FROM</command>, but the
|
<filename>$PGDATA</filename>), not the client's working directory.
|
||||||
target relation will, of course, be partially modified in a
|
|
||||||
<command>COPY TO</command>.
|
|
||||||
<command>VACUUM</command> should be used to clean up
|
|
||||||
after a failed copy.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Because the Postgres backend's current working directory
|
|
||||||
is not usually the same as the user's
|
|
||||||
working directory, the result of copying to a file
|
|
||||||
"<filename>foo</filename>" (without
|
|
||||||
additional path information) may yield unexpected results for the
|
|
||||||
naive user. In this case, <filename>foo</filename>
|
|
||||||
will wind up in <filename>$PGDATA/foo</filename>. In
|
|
||||||
general, the full pathname as it would appear to the backend server machine
|
|
||||||
should be used when specifying files to
|
|
||||||
be copied.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
Files used as arguments to <command>COPY</command>
|
|
||||||
must reside on or be
|
|
||||||
accessible to the database server machine by being either on
|
|
||||||
local disks or on a networked file system.
|
|
||||||
</para>
|
|
||||||
<para>
|
|
||||||
When a TCP/IP connection from one machine to another is used, and a
|
|
||||||
target file is specified, the target file will be written on the
|
|
||||||
machine where the backend is running rather than the user's
|
|
||||||
machine.
|
|
||||||
</para>
|
</para>
|
||||||
</refsect2>
|
</refsect2>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
<refsect1 id="R1-SQL-COPY-2">
|
<refsect1 id="R1-SQL-COPY-2">
|
||||||
<refsect1info>
|
<refsect1info>
|
||||||
<date>1998-05-04</date>
|
<date>2001-01-02</date>
|
||||||
</refsect1info>
|
</refsect1info>
|
||||||
<title>File Formats</title>
|
<title>File Formats</title>
|
||||||
<refsect2>
|
<refsect2>
|
||||||
<refsect2info>
|
<refsect2info>
|
||||||
<date>1998-05-04</date>
|
<date>2001-01-02</date>
|
||||||
</refsect2info>
|
</refsect2info>
|
||||||
<title>Text Format</title>
|
<title>Text Format</title>
|
||||||
<para>
|
<para>
|
||||||
@ -293,27 +312,34 @@ ERROR: <replaceable>reason</replaceable>
|
|||||||
<para>
|
<para>
|
||||||
The actual format for each instance is
|
The actual format for each instance is
|
||||||
<programlisting>
|
<programlisting>
|
||||||
<attr1><<replaceable class=parameter>separator</replaceable>><attr2><<replaceable class=parameter>separator</replaceable>>...<<replaceable class=parameter>separator</replaceable>><attr<replaceable class="parameter">n</replaceable>><newline>.
|
<attr1><<replaceable class=parameter>separator</replaceable>><attr2><<replaceable class=parameter>separator</replaceable>>...<<replaceable class=parameter>separator</replaceable>><attr<replaceable class="parameter">n</replaceable>><newline>
|
||||||
</programlisting>
|
</programlisting>
|
||||||
The oid is placed on the beginning of the line
|
Note that the end of each row is marked by a Unix-style newline
|
||||||
if WITH OIDS is specified.
|
("\n"). <command>COPY FROM</command> will not behave as desired
|
||||||
|
if given a file containing DOS- or Mac-style newlines.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
If <command>COPY</command> is sending its output to standard
|
The OID is emitted as the first column if WITH OIDS is specified.
|
||||||
output instead of a file, it will send a backslash("\") and a period
|
</para>
|
||||||
(".") followed immediately by a newline, on a separate line,
|
<para>
|
||||||
when it is done. Similarly, if <command>COPY</command> is reading
|
If <command>COPY TO</command> is sending its output to standard
|
||||||
|
output instead of a file, after the last row it will send a backslash ("\")
|
||||||
|
and a period (".") followed by a newline.
|
||||||
|
Similarly, if <command>COPY FROM</command> is reading
|
||||||
from standard input, it will expect a backslash ("\") and a period
|
from standard input, it will expect a backslash ("\") and a period
|
||||||
(".") followed by a newline, as the first three characters on a
|
(".") followed by a newline, as the first three characters on a
|
||||||
line to denote end-of-file. However, <command>COPY</command>
|
line to denote end-of-file. However, <command>COPY FROM</command>
|
||||||
will terminate (followed by the backend itself) if a true EOF is
|
will terminate correctly (followed by the backend itself) if the
|
||||||
encountered before this special end-of-file pattern is found.
|
input connection is closed before this special end-of-file pattern is
|
||||||
|
found.
|
||||||
</para>
|
</para>
|
||||||
<para>
|
<para>
|
||||||
The backslash character has other special meanings. A literal backslash
|
The backslash character has other special meanings. A literal backslash
|
||||||
character is represented as two
|
character is represented as two
|
||||||
consecutive backslashes ("\\"). A literal tab character is represented
|
consecutive backslashes ("\\"). A literal tab character is represented
|
||||||
as a backslash and a tab. A literal newline character is
|
as a backslash and a tab. (If you are using something other than tab
|
||||||
|
as the column delimiter, backslash that delimiter character to include
|
||||||
|
it in data.) A literal newline character is
|
||||||
represented as a backslash and a newline. When loading text data
|
represented as a backslash and a newline. When loading text data
|
||||||
not generated by <acronym>Postgres</acronym>,
|
not generated by <acronym>Postgres</acronym>,
|
||||||
you will need to convert backslash
|
you will need to convert backslash
|
||||||
@ -324,90 +350,215 @@ ERROR: <replaceable>reason</replaceable>
|
|||||||
|
|
||||||
<refsect2>
|
<refsect2>
|
||||||
<refsect2info>
|
<refsect2info>
|
||||||
<date>1998-05-04</date>
|
<date>2001-01-02</date>
|
||||||
</refsect2info>
|
</refsect2info>
|
||||||
<title>Binary Format</title>
|
<title>Binary Format</title>
|
||||||
<para>
|
<para>
|
||||||
In the case of <command>COPY BINARY</command>, the first four
|
The file format used for <command>COPY BINARY</command> changed in
|
||||||
bytes in the file will be the number of instances in the file. If
|
Postgres v7.1. The new format consists of a file header, zero or more
|
||||||
this number is zero, the <command>COPY BINARY</command> command
|
tuples, and a file trailer.
|
||||||
will read until end-of-file is encountered. Otherwise, it will
|
|
||||||
stop reading when this number of instances has been read.
|
|
||||||
Remaining data in the file will be ignored.
|
|
||||||
</para>
|
</para>
|
||||||
<para>
|
|
||||||
The format for each instance in the file is as follows. Note that
|
|
||||||
this format must be followed <emphasis>exactly</emphasis>.
|
|
||||||
Unsigned four-byte integer quantities are called uint32 in the
|
|
||||||
table below.
|
|
||||||
</para>
|
|
||||||
<table frame="all">
|
|
||||||
<title>Contents of a binary copy file</title>
|
|
||||||
<tgroup cols="2" colsep="1" rowsep="1" align="center">
|
|
||||||
<colspec colname="col1">
|
|
||||||
<colspec colname="col2">
|
|
||||||
<spanspec namest="col1" nameend="col2" spanname="subhead">
|
|
||||||
<tbody>
|
|
||||||
<row>
|
|
||||||
<entry spanname="subhead" align="center">At the start of the file</entry>
|
|
||||||
</row>
|
|
||||||
<row>
|
|
||||||
<entry>uint32</entry>
|
|
||||||
<entry>number of tuples</entry>
|
|
||||||
</row>
|
|
||||||
<row>
|
|
||||||
<entry spanname="subhead" align="center">For each tuple</entry>
|
|
||||||
</row>
|
|
||||||
<row>
|
|
||||||
<entry>uint32</entry>
|
|
||||||
<entry>total length of tuple data</entry>
|
|
||||||
</row>
|
|
||||||
<row>
|
|
||||||
<entry>uint32</entry>
|
|
||||||
<entry>oid (if specified)</entry>
|
|
||||||
</row>
|
|
||||||
<row>
|
|
||||||
<entry>uint32</entry>
|
|
||||||
<entry>number of null attributes</entry>
|
|
||||||
</row>
|
|
||||||
<row>
|
|
||||||
<entry>[uint32,...,uint32]</entry>
|
|
||||||
<entry>attribute numbers of attributes, counting from 0</entry>
|
|
||||||
</row>
|
|
||||||
<row>
|
|
||||||
<entry>-</entry>
|
|
||||||
<entry><tuple data></entry>
|
|
||||||
</row>
|
|
||||||
</tbody>
|
|
||||||
</tgroup>
|
|
||||||
</table>
|
|
||||||
|
|
||||||
</refsect2>
|
<refsect3>
|
||||||
<refsect2>
|
<refsect3info>
|
||||||
<refsect2info>
|
<date>2001-01-02</date>
|
||||||
<date>1998-05-04</date>
|
</refsect3info>
|
||||||
</refsect2info>
|
<title>
|
||||||
<title>Alignment of Binary Data</title>
|
File Header
|
||||||
|
</title>
|
||||||
<para>
|
<para>
|
||||||
On Sun-3s, 2-byte attributes are aligned on two-byte boundaries,
|
The file header consists of 24 bytes of fixed fields, followed
|
||||||
and all larger attributes are aligned on four-byte boundaries.
|
by a variable-length header extension area. The fixed fields are:
|
||||||
Character attributes are aligned on single-byte boundaries. On
|
|
||||||
most other machines, all attributes larger than 1 byte are aligned on
|
<variablelist>
|
||||||
four-byte boundaries. Note that variable length attributes are
|
<varlistentry>
|
||||||
preceded by the attribute's length; arrays are simply contiguous
|
<term>Signature</term>
|
||||||
streams of the array element type.
|
<listitem>
|
||||||
|
<para>
|
||||||
|
12-byte sequence "PGBCOPY\n\377\r\n\0" --- note that the null
|
||||||
|
is a required part of the signature. (The signature is designed to allow
|
||||||
|
easy identification of files that have been munged by a non-8-bit-clean
|
||||||
|
transfer. This signature will be changed by newline-translation
|
||||||
|
filters, dropped nulls, dropped high bits, or parity changes.)
|
||||||
</para>
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term>Integer layout field</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
int32 constant 0x01020304 in source's byte order.
|
||||||
|
Potentially, a reader could engage in byte-flipping of subsequent fields
|
||||||
|
if the wrong byte order is detected here.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term>Flags field</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
int32 bit mask to denote important aspects of the file
|
||||||
|
format. Bits are numbered from 0 (LSB) to 31 (MSB) --- note that this
|
||||||
|
field is stored with source's endianness, as are all subsequent integer
|
||||||
|
fields. Bits 16-31 are reserved to denote critical file format issues;
|
||||||
|
a reader should abort if it finds an unexpected bit set in this range.
|
||||||
|
Bits 0-15 are reserved to signal backwards-compatible format issues;
|
||||||
|
a reader should simply ignore any unexpected bits set in this range.
|
||||||
|
Currently only one flag bit is defined, and the rest must be zero:
|
||||||
|
<variablelist>
|
||||||
|
<varlistentry>
|
||||||
|
<term>Bit 16</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
if 1, OIDs are included in the dump; if 0, not
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term>Header extension area length</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
int32 length in bytes of remainder of header, not including self. In
|
||||||
|
the initial version this will be zero, and the first tuple follows
|
||||||
|
immediately. Future changes to the format might allow additional data
|
||||||
|
to be present in the header. A reader should silently skip over any header
|
||||||
|
extension data it does not know what to do with.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
The header extension area is envisioned to contain a sequence of
|
||||||
|
self-identifying chunks. The flags field is not intended to tell readers
|
||||||
|
what is in the extension area. Specific design of header extension contents
|
||||||
|
is left for a later release.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
This design allows for both backwards-compatible header additions (add
|
||||||
|
header extension chunks, or set low-order flag bits) and
|
||||||
|
non-backwards-compatible changes (set high-order flag bits to signal such
|
||||||
|
changes, and add supporting data to the extension area if needed).
|
||||||
|
</para>
|
||||||
|
</refsect3>
|
||||||
|
|
||||||
|
<refsect3>
|
||||||
|
<refsect3info>
|
||||||
|
<date>2001-01-02</date>
|
||||||
|
</refsect3info>
|
||||||
|
<title>
|
||||||
|
Tuples
|
||||||
|
</title>
|
||||||
|
<para>
|
||||||
|
Each tuple begins with an int16 count of the number of fields in the
|
||||||
|
tuple. (Presently, all tuples in a table will have the same count, but
|
||||||
|
that might not always be true.) Then, repeated for each field in the
|
||||||
|
tuple, there is an int16 typlen word possibly followed by field data.
|
||||||
|
The typlen field is interpreted thus:
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
<varlistentry>
|
||||||
|
<term>Zero</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Field is NULL. No data follows.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term>> 0</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Field is a fixed-length datatype. Exactly N
|
||||||
|
bytes of data follow the typlen word.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term>-1</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Field is a varlena datatype. The next four
|
||||||
|
bytes are the varlena header, which contains
|
||||||
|
the total value length including itself.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term>< -1</term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Reserved for future use.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
For non-NULL fields, the reader can check that the typlen matches the
|
||||||
|
expected typlen for the destination column. This provides a simple
|
||||||
|
but very useful check that the data is as expected.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
There is no alignment padding or any other extra data between fields.
|
||||||
|
Note also that the format does not distinguish whether a datatype is
|
||||||
|
pass-by-reference or pass-by-value. Both of these provisions are
|
||||||
|
deliberate: they might help improve portability of the files (although
|
||||||
|
of course endianness and floating-point-format issues can still keep
|
||||||
|
you from moving a binary file across machines).
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
If OIDs are included in the dump, the OID field immediately follows the
|
||||||
|
field-count word. It is a normal field except that it's not included
|
||||||
|
in the field-count. In particular it has a typlen --- this will allow
|
||||||
|
handling of 4-byte vs 8-byte OIDs without too much pain, and will allow
|
||||||
|
OIDs to be shown as NULL if we someday allow OIDs to be optional.
|
||||||
|
</para>
|
||||||
|
</refsect3>
|
||||||
|
|
||||||
|
<refsect3>
|
||||||
|
<refsect3info>
|
||||||
|
<date>2001-01-02</date>
|
||||||
|
</refsect3info>
|
||||||
|
<title>
|
||||||
|
File Trailer
|
||||||
|
</title>
|
||||||
|
<para>
|
||||||
|
The file trailer consists of an int16 word containing -1. This is
|
||||||
|
easily distinguished from a tuple's field-count word.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
A reader should report an error if a field-count word is neither -1
|
||||||
|
nor the expected number of columns. This provides an extra
|
||||||
|
check against somehow getting out of sync with the data.
|
||||||
|
</para>
|
||||||
|
</refsect3>
|
||||||
</refsect2>
|
</refsect2>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
|
||||||
<refsect1 id="R1-SQL-COPY-3">
|
<refsect1 id="R1-SQL-COPY-3">
|
||||||
<title>
|
<title>
|
||||||
Usage
|
Usage
|
||||||
</title>
|
</title>
|
||||||
<para>
|
<para>
|
||||||
The following example copies a table to standard output,
|
The following example copies a table to standard output,
|
||||||
using a pipe (|) as the field
|
using a vertical bar (|) as the field
|
||||||
delimiter:
|
delimiter:
|
||||||
</para>
|
</para>
|
||||||
<programlisting>
|
<programlisting>
|
||||||
@ -425,36 +576,36 @@ COPY country FROM '/usr1/proj/bray/sql/country_data';
|
|||||||
has the termination sequence on the last line):
|
has the termination sequence on the last line):
|
||||||
</para>
|
</para>
|
||||||
<programlisting>
|
<programlisting>
|
||||||
AF AFGHANISTAN
|
AF AFGHANISTAN
|
||||||
AL ALBANIA
|
AL ALBANIA
|
||||||
DZ ALGERIA
|
DZ ALGERIA
|
||||||
...
|
ZM ZAMBIA
|
||||||
ZM ZAMBIA
|
ZW ZIMBABWE
|
||||||
ZW ZIMBABWE
|
\.
|
||||||
\.
|
|
||||||
</programlisting>
|
</programlisting>
|
||||||
<para>
|
<para>
|
||||||
The following is the same data, output in binary format on a Linux/i586 machine.
|
Note that the white space on each line is actually a TAB.
|
||||||
The data is shown after filtering through
|
</para>
|
||||||
|
<para>
|
||||||
|
The following is the same data, output in binary format on a Linux/i586
|
||||||
|
machine. The data is shown after filtering through
|
||||||
the Unix utility <command>od -c</command>. The table has
|
the Unix utility <command>od -c</command>. The table has
|
||||||
three fields; the first is <classname>char(2)</classname>
|
three fields; the first is <classname>char(2)</classname>,
|
||||||
and the second is <classname>text</classname>. All the
|
the second is <classname>text</classname>, and the third is
|
||||||
|
<classname>int4</classname>. All the
|
||||||
rows have a null value in the third field.
|
rows have a null value in the third field.
|
||||||
Notice how the <classname>char(2)</classname>
|
|
||||||
field is padded with nulls to four bytes and the text field is
|
|
||||||
preceded by its length:
|
|
||||||
</para>
|
</para>
|
||||||
<programlisting>
|
<programlisting>
|
||||||
355 \0 \0 \0 027 \0 \0 \0 001 \0 \0 \0 002 \0 \0 \0
|
0000000 P G B C O P Y \n 377 \r \n \0 004 003 002 001
|
||||||
006 \0 \0 \0 A F \0 \0 017 \0 \0 \0 A F G H
|
0000020 \0 \0 \0 \0 \0 \0 \0 \0 003 \0 377 377 006 \0 \0 \0
|
||||||
A N I S T A N 023 \0 \0 \0 001 \0 \0 \0 002
|
0000040 A F 377 377 017 \0 \0 \0 A F G H A N I S
|
||||||
\0 \0 \0 006 \0 \0 \0 A L \0 \0 \v \0 \0 \0 A
|
0000060 T A N \0 \0 003 \0 377 377 006 \0 \0 \0 A L 377
|
||||||
L B A N I A 023 \0 \0 \0 001 \0 \0 \0 002 \0
|
0000100 377 \v \0 \0 \0 A L B A N I A \0 \0 003 \0
|
||||||
\0 \0 006 \0 \0 \0 D Z \0 \0 \v \0 \0 \0 A L
|
0000120 377 377 006 \0 \0 \0 D Z 377 377 \v \0 \0 \0 A L
|
||||||
G E R I A
|
0000140 G E R I A \0 \0 003 \0 377 377 006 \0 \0 \0 Z
|
||||||
... \n \0 \0 \0 Z A M B I A 024 \0
|
0000160 M 377 377 \n \0 \0 \0 Z A M B I A \0 \0 003
|
||||||
\0 \0 001 \0 \0 \0 002 \0 \0 \0 006 \0 \0 \0 Z W
|
0000200 \0 377 377 006 \0 \0 \0 Z W 377 377 \f \0 \0 \0 Z
|
||||||
\0 \0 \f \0 \0 \0 Z I M B A B W E
|
0000220 I M B A B W E \0 \0 377 377
|
||||||
</programlisting>
|
</programlisting>
|
||||||
</refsect1>
|
</refsect1>
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.126 2000/12/27 23:59:14 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/commands/copy.c,v 1.127 2001/01/03 20:04:10 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -52,7 +52,8 @@ static Oid GetTypeElement(Oid type);
|
|||||||
static void CopyReadNewline(FILE *fp, int *newline);
|
static void CopyReadNewline(FILE *fp, int *newline);
|
||||||
static char *CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_print);
|
static char *CopyReadAttribute(FILE *fp, bool *isnull, char *delim, int *newline, char *null_print);
|
||||||
static void CopyAttributeOut(FILE *fp, char *string, char *delim);
|
static void CopyAttributeOut(FILE *fp, char *string, char *delim);
|
||||||
static int CountTuples(Relation relation);
|
|
||||||
|
static const char BinarySignature[12] = "PGBCOPY\n\377\r\n\0";
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Static communication variables ... pretty grotty, but COPY has
|
* Static communication variables ... pretty grotty, but COPY has
|
||||||
@ -387,7 +388,8 @@ DoCopy(char *relname, bool binary, bool oids, bool from, bool pipe,
|
|||||||
* Copy from relation TO file.
|
* Copy from relation TO file.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_print)
|
CopyTo(Relation rel, bool binary, bool oids, FILE *fp,
|
||||||
|
char *delim, char *null_print)
|
||||||
{
|
{
|
||||||
HeapTuple tuple;
|
HeapTuple tuple;
|
||||||
TupleDesc tupDesc;
|
TupleDesc tupDesc;
|
||||||
@ -398,20 +400,9 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_p
|
|||||||
FmgrInfo *out_functions;
|
FmgrInfo *out_functions;
|
||||||
Oid *elements;
|
Oid *elements;
|
||||||
bool *isvarlena;
|
bool *isvarlena;
|
||||||
int32 *typmod;
|
int16 fld_size;
|
||||||
char *nulls;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* <nulls> is a (dynamically allocated) array with one character per
|
|
||||||
* attribute in the instance being copied. nulls[I-1] is 'n' if
|
|
||||||
* Attribute Number I is null, and ' ' otherwise.
|
|
||||||
*
|
|
||||||
* <nulls> is meaningful only if we are doing a binary copy.
|
|
||||||
*/
|
|
||||||
char *string;
|
char *string;
|
||||||
|
|
||||||
scandesc = heap_beginscan(rel, 0, QuerySnapshot, 0, NULL);
|
|
||||||
|
|
||||||
tupDesc = rel->rd_att;
|
tupDesc = rel->rd_att;
|
||||||
attr_count = rel->rd_att->natts;
|
attr_count = rel->rd_att->natts;
|
||||||
attr = rel->rd_att->attrs;
|
attr = rel->rd_att->attrs;
|
||||||
@ -420,7 +411,6 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_p
|
|||||||
out_functions = (FmgrInfo *) palloc(attr_count * sizeof(FmgrInfo));
|
out_functions = (FmgrInfo *) palloc(attr_count * sizeof(FmgrInfo));
|
||||||
elements = (Oid *) palloc(attr_count * sizeof(Oid));
|
elements = (Oid *) palloc(attr_count * sizeof(Oid));
|
||||||
isvarlena = (bool *) palloc(attr_count * sizeof(bool));
|
isvarlena = (bool *) palloc(attr_count * sizeof(bool));
|
||||||
typmod = (int32 *) palloc(attr_count * sizeof(int32));
|
|
||||||
for (i = 0; i < attr_count; i++)
|
for (i = 0; i < attr_count; i++)
|
||||||
{
|
{
|
||||||
Oid out_func_oid;
|
Oid out_func_oid;
|
||||||
@ -430,40 +420,62 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_p
|
|||||||
elog(ERROR, "COPY: couldn't lookup info for type %u",
|
elog(ERROR, "COPY: couldn't lookup info for type %u",
|
||||||
attr[i]->atttypid);
|
attr[i]->atttypid);
|
||||||
fmgr_info(out_func_oid, &out_functions[i]);
|
fmgr_info(out_func_oid, &out_functions[i]);
|
||||||
typmod[i] = attr[i]->atttypmod;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!binary)
|
if (binary)
|
||||||
{
|
{
|
||||||
nulls = NULL; /* meaningless, but compiler doesn't know
|
/* Generate header for a binary copy */
|
||||||
* that */
|
int32 tmp;
|
||||||
|
|
||||||
|
/* Signature */
|
||||||
|
CopySendData((char *) BinarySignature, 12, fp);
|
||||||
|
/* Integer layout field */
|
||||||
|
tmp = 0x01020304;
|
||||||
|
CopySendData(&tmp, sizeof(int32), fp);
|
||||||
|
/* Flags field */
|
||||||
|
tmp = 0;
|
||||||
|
if (oids)
|
||||||
|
tmp |= (1 << 16);
|
||||||
|
CopySendData(&tmp, sizeof(int32), fp);
|
||||||
|
/* No header extension */
|
||||||
|
tmp = 0;
|
||||||
|
CopySendData(&tmp, sizeof(int32), fp);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
int32 ntuples;
|
|
||||||
|
|
||||||
nulls = (char *) palloc(attr_count);
|
scandesc = heap_beginscan(rel, 0, QuerySnapshot, 0, NULL);
|
||||||
for (i = 0; i < attr_count; i++)
|
|
||||||
nulls[i] = ' ';
|
|
||||||
|
|
||||||
/* XXX expensive */
|
|
||||||
|
|
||||||
ntuples = CountTuples(rel);
|
|
||||||
CopySendData(&ntuples, sizeof(int32), fp);
|
|
||||||
}
|
|
||||||
|
|
||||||
while (HeapTupleIsValid(tuple = heap_getnext(scandesc, 0)))
|
while (HeapTupleIsValid(tuple = heap_getnext(scandesc, 0)))
|
||||||
{
|
{
|
||||||
|
bool need_delim = false;
|
||||||
|
|
||||||
if (QueryCancel)
|
if (QueryCancel)
|
||||||
CancelQuery();
|
CancelQuery();
|
||||||
|
|
||||||
if (oids && !binary)
|
if (binary)
|
||||||
|
{
|
||||||
|
/* Binary per-tuple header */
|
||||||
|
int16 fld_count = attr_count;
|
||||||
|
|
||||||
|
CopySendData(&fld_count, sizeof(int16), fp);
|
||||||
|
/* Send OID if wanted --- note fld_count doesn't include it */
|
||||||
|
if (oids)
|
||||||
|
{
|
||||||
|
fld_size = sizeof(Oid);
|
||||||
|
CopySendData(&fld_size, sizeof(int16), fp);
|
||||||
|
CopySendData(&tuple->t_data->t_oid, sizeof(Oid), fp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Text format has no per-tuple header, but send OID if wanted */
|
||||||
|
if (oids)
|
||||||
{
|
{
|
||||||
string = DatumGetCString(DirectFunctionCall1(oidout,
|
string = DatumGetCString(DirectFunctionCall1(oidout,
|
||||||
ObjectIdGetDatum(tuple->t_data->t_oid)));
|
ObjectIdGetDatum(tuple->t_data->t_oid)));
|
||||||
CopySendString(string, fp);
|
CopySendString(string, fp);
|
||||||
CopySendChar(delim[0], fp);
|
|
||||||
pfree(string);
|
pfree(string);
|
||||||
|
need_delim = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < attr_count; i++)
|
for (i = 0; i < attr_count; i++)
|
||||||
@ -474,18 +486,31 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_p
|
|||||||
|
|
||||||
origvalue = heap_getattr(tuple, i + 1, tupDesc, &isnull);
|
origvalue = heap_getattr(tuple, i + 1, tupDesc, &isnull);
|
||||||
|
|
||||||
|
if (!binary)
|
||||||
|
{
|
||||||
|
if (need_delim)
|
||||||
|
CopySendChar(delim[0], fp);
|
||||||
|
need_delim = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (isnull)
|
if (isnull)
|
||||||
{
|
{
|
||||||
if (!binary)
|
if (!binary)
|
||||||
|
{
|
||||||
CopySendString(null_print, fp); /* null indicator */
|
CopySendString(null_print, fp); /* null indicator */
|
||||||
|
}
|
||||||
else
|
else
|
||||||
nulls[i] = 'n';
|
{
|
||||||
|
fld_size = 0; /* null marker */
|
||||||
|
CopySendData(&fld_size, sizeof(int16), fp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If we have a toasted datum, forcibly detoast it to avoid
|
* If we have a toasted datum, forcibly detoast it to avoid
|
||||||
* memory leakage inside the type's output routine.
|
* memory leakage inside the type's output routine (or
|
||||||
|
* for binary case, becase we must output untoasted value).
|
||||||
*/
|
*/
|
||||||
if (isvarlena[i])
|
if (isvarlena[i])
|
||||||
value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
|
value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
|
||||||
@ -497,73 +522,69 @@ CopyTo(Relation rel, bool binary, bool oids, FILE *fp, char *delim, char *null_p
|
|||||||
string = DatumGetCString(FunctionCall3(&out_functions[i],
|
string = DatumGetCString(FunctionCall3(&out_functions[i],
|
||||||
value,
|
value,
|
||||||
ObjectIdGetDatum(elements[i]),
|
ObjectIdGetDatum(elements[i]),
|
||||||
Int32GetDatum(typmod[i])));
|
Int32GetDatum(attr[i]->atttypmod)));
|
||||||
CopyAttributeOut(fp, string, delim);
|
CopyAttributeOut(fp, string, delim);
|
||||||
pfree(string);
|
pfree(string);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fld_size = attr[i]->attlen;
|
||||||
|
CopySendData(&fld_size, sizeof(int16), fp);
|
||||||
|
if (isvarlena[i])
|
||||||
|
{
|
||||||
|
/* varlena */
|
||||||
|
Assert(fld_size == -1);
|
||||||
|
CopySendData(DatumGetPointer(value),
|
||||||
|
VARSIZE(value),
|
||||||
|
fp);
|
||||||
|
}
|
||||||
|
else if (!attr[i]->attbyval)
|
||||||
|
{
|
||||||
|
/* fixed-length pass-by-reference */
|
||||||
|
Assert(fld_size > 0);
|
||||||
|
CopySendData(DatumGetPointer(value),
|
||||||
|
fld_size,
|
||||||
|
fp);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* pass-by-value */
|
||||||
|
Datum datumBuf;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need this horsing around because we don't know
|
||||||
|
* how shorter data values are aligned within a Datum.
|
||||||
|
*/
|
||||||
|
store_att_byval(&datumBuf, value, fld_size);
|
||||||
|
CopySendData(&datumBuf,
|
||||||
|
fld_size,
|
||||||
|
fp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Clean up detoasted copy, if any */
|
/* Clean up detoasted copy, if any */
|
||||||
if (value != origvalue)
|
if (value != origvalue)
|
||||||
pfree(DatumGetPointer(value));
|
pfree(DatumGetPointer(value));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!binary)
|
if (!binary)
|
||||||
{
|
|
||||||
if (i == attr_count - 1)
|
|
||||||
CopySendChar('\n', fp);
|
CopySendChar('\n', fp);
|
||||||
else
|
|
||||||
{
|
|
||||||
|
|
||||||
/*
|
|
||||||
* when copying out, only use the first char of the
|
|
||||||
* delim string
|
|
||||||
*/
|
|
||||||
CopySendChar(delim[0], fp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (binary)
|
|
||||||
{
|
|
||||||
int32 null_ct = 0,
|
|
||||||
length;
|
|
||||||
|
|
||||||
for (i = 0; i < attr_count; i++)
|
|
||||||
{
|
|
||||||
if (nulls[i] == 'n')
|
|
||||||
null_ct++;
|
|
||||||
}
|
|
||||||
|
|
||||||
length = tuple->t_len - tuple->t_data->t_hoff;
|
|
||||||
CopySendData(&length, sizeof(int32), fp);
|
|
||||||
if (oids)
|
|
||||||
CopySendData((char *) &tuple->t_data->t_oid, sizeof(int32), fp);
|
|
||||||
|
|
||||||
CopySendData(&null_ct, sizeof(int32), fp);
|
|
||||||
if (null_ct > 0)
|
|
||||||
{
|
|
||||||
for (i = 0; i < attr_count; i++)
|
|
||||||
{
|
|
||||||
if (nulls[i] == 'n')
|
|
||||||
{
|
|
||||||
CopySendData(&i, sizeof(int32), fp);
|
|
||||||
nulls[i] = ' ';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
CopySendData((char *) tuple->t_data + tuple->t_data->t_hoff,
|
|
||||||
length, fp);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
heap_endscan(scandesc);
|
heap_endscan(scandesc);
|
||||||
|
|
||||||
|
if (binary)
|
||||||
|
{
|
||||||
|
/* Generate trailer for a binary copy */
|
||||||
|
int16 fld_count = -1;
|
||||||
|
|
||||||
|
CopySendData(&fld_count, sizeof(int16), fp);
|
||||||
|
}
|
||||||
|
|
||||||
pfree(out_functions);
|
pfree(out_functions);
|
||||||
pfree(elements);
|
pfree(elements);
|
||||||
pfree(isvarlena);
|
pfree(isvarlena);
|
||||||
pfree(typmod);
|
|
||||||
if (binary)
|
|
||||||
pfree(nulls);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -580,27 +601,20 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
|||||||
AttrNumber attr_count;
|
AttrNumber attr_count;
|
||||||
FmgrInfo *in_functions;
|
FmgrInfo *in_functions;
|
||||||
Oid *elements;
|
Oid *elements;
|
||||||
int32 *typmod;
|
|
||||||
int i;
|
int i;
|
||||||
Oid in_func_oid;
|
Oid in_func_oid;
|
||||||
Datum *values;
|
Datum *values;
|
||||||
char *nulls;
|
char *nulls;
|
||||||
bool isnull;
|
bool isnull;
|
||||||
int done = 0;
|
int done = 0;
|
||||||
char *string = NULL,
|
char *string;
|
||||||
*ptr;
|
|
||||||
int32 len,
|
|
||||||
null_ct,
|
|
||||||
null_id;
|
|
||||||
int32 ntuples,
|
|
||||||
tuples_read = 0;
|
|
||||||
bool reading_to_eof = true;
|
|
||||||
ResultRelInfo *resultRelInfo;
|
ResultRelInfo *resultRelInfo;
|
||||||
EState *estate = CreateExecutorState(); /* for ExecConstraints() */
|
EState *estate = CreateExecutorState(); /* for ExecConstraints() */
|
||||||
TupleTable tupleTable;
|
TupleTable tupleTable;
|
||||||
TupleTableSlot *slot;
|
TupleTableSlot *slot;
|
||||||
Oid loaded_oid = InvalidOid;
|
Oid loaded_oid = InvalidOid;
|
||||||
bool skip_tuple = false;
|
bool skip_tuple = false;
|
||||||
|
bool file_has_oids;
|
||||||
|
|
||||||
tupDesc = RelationGetDescr(rel);
|
tupDesc = RelationGetDescr(rel);
|
||||||
attr = tupDesc->attrs;
|
attr = tupDesc->attrs;
|
||||||
@ -630,31 +644,58 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
|||||||
{
|
{
|
||||||
in_functions = (FmgrInfo *) palloc(attr_count * sizeof(FmgrInfo));
|
in_functions = (FmgrInfo *) palloc(attr_count * sizeof(FmgrInfo));
|
||||||
elements = (Oid *) palloc(attr_count * sizeof(Oid));
|
elements = (Oid *) palloc(attr_count * sizeof(Oid));
|
||||||
typmod = (int32 *) palloc(attr_count * sizeof(int32));
|
|
||||||
for (i = 0; i < attr_count; i++)
|
for (i = 0; i < attr_count; i++)
|
||||||
{
|
{
|
||||||
in_func_oid = (Oid) GetInputFunction(attr[i]->atttypid);
|
in_func_oid = (Oid) GetInputFunction(attr[i]->atttypid);
|
||||||
fmgr_info(in_func_oid, &in_functions[i]);
|
fmgr_info(in_func_oid, &in_functions[i]);
|
||||||
elements[i] = GetTypeElement(attr[i]->atttypid);
|
elements[i] = GetTypeElement(attr[i]->atttypid);
|
||||||
typmod[i] = attr[i]->atttypmod;
|
|
||||||
}
|
}
|
||||||
|
file_has_oids = oids; /* must rely on user to tell us this... */
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
/* Read and verify binary header */
|
||||||
|
char readSig[12];
|
||||||
|
int32 tmp;
|
||||||
|
|
||||||
|
/* Signature */
|
||||||
|
CopyGetData(readSig, 12, fp);
|
||||||
|
if (CopyGetEof(fp) ||
|
||||||
|
memcmp(readSig, BinarySignature, 12) != 0)
|
||||||
|
elog(ERROR, "COPY BINARY: file signature not recognized");
|
||||||
|
/* Integer layout field */
|
||||||
|
CopyGetData(&tmp, sizeof(int32), fp);
|
||||||
|
if (CopyGetEof(fp) ||
|
||||||
|
tmp != 0x01020304)
|
||||||
|
elog(ERROR, "COPY BINARY: incompatible integer layout");
|
||||||
|
/* Flags field */
|
||||||
|
CopyGetData(&tmp, sizeof(int32), fp);
|
||||||
|
if (CopyGetEof(fp))
|
||||||
|
elog(ERROR, "COPY BINARY: bogus file header (missing flags)");
|
||||||
|
file_has_oids = (tmp & (1 << 16)) != 0;
|
||||||
|
tmp &= ~ (1 << 16);
|
||||||
|
if ((tmp >> 16) != 0)
|
||||||
|
elog(ERROR, "COPY BINARY: unrecognized critical flags in header");
|
||||||
|
/* Header extension length */
|
||||||
|
CopyGetData(&tmp, sizeof(int32), fp);
|
||||||
|
if (CopyGetEof(fp) ||
|
||||||
|
tmp < 0)
|
||||||
|
elog(ERROR, "COPY BINARY: bogus file header (missing length)");
|
||||||
|
/* Skip extension header, if present */
|
||||||
|
while (tmp-- > 0)
|
||||||
|
{
|
||||||
|
CopyGetData(readSig, 1, fp);
|
||||||
|
if (CopyGetEof(fp))
|
||||||
|
elog(ERROR, "COPY BINARY: bogus file header (wrong length)");
|
||||||
|
}
|
||||||
|
|
||||||
in_functions = NULL;
|
in_functions = NULL;
|
||||||
elements = NULL;
|
elements = NULL;
|
||||||
typmod = NULL;
|
|
||||||
CopyGetData(&ntuples, sizeof(int32), fp);
|
|
||||||
if (ntuples != 0)
|
|
||||||
reading_to_eof = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
values = (Datum *) palloc(attr_count * sizeof(Datum));
|
values = (Datum *) palloc(attr_count * sizeof(Datum));
|
||||||
nulls = (char *) palloc(attr_count * sizeof(char));
|
nulls = (char *) palloc(attr_count * sizeof(char));
|
||||||
|
|
||||||
for (i = 0; i < attr_count; i++)
|
|
||||||
nulls[i] = ' ';
|
|
||||||
|
|
||||||
lineno = 0;
|
lineno = 0;
|
||||||
fe_eof = false;
|
fe_eof = false;
|
||||||
|
|
||||||
@ -668,15 +709,22 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
|||||||
|
|
||||||
lineno++;
|
lineno++;
|
||||||
|
|
||||||
|
/* Initialize all values for row to NULL */
|
||||||
|
MemSet(values, 0, attr_count * sizeof(Datum));
|
||||||
|
MemSet(nulls, 'n', attr_count * sizeof(char));
|
||||||
|
|
||||||
if (!binary)
|
if (!binary)
|
||||||
{
|
{
|
||||||
int newline = 0;
|
int newline = 0;
|
||||||
|
|
||||||
if (oids)
|
if (file_has_oids)
|
||||||
{
|
{
|
||||||
string = CopyReadAttribute(fp, &isnull, delim, &newline, null_print);
|
string = CopyReadAttribute(fp, &isnull, delim,
|
||||||
if (string == NULL)
|
&newline, null_print);
|
||||||
done = 1;
|
if (isnull)
|
||||||
|
elog(ERROR, "COPY TEXT: NULL Oid");
|
||||||
|
else if (string == NULL)
|
||||||
|
done = 1; /* end of file */
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
loaded_oid = DatumGetObjectId(DirectFunctionCall1(oidin,
|
loaded_oid = DatumGetObjectId(DirectFunctionCall1(oidin,
|
||||||
@ -685,22 +733,24 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
|||||||
elog(ERROR, "COPY TEXT: Invalid Oid");
|
elog(ERROR, "COPY TEXT: Invalid Oid");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < attr_count && !done; i++)
|
for (i = 0; i < attr_count && !done; i++)
|
||||||
{
|
{
|
||||||
string = CopyReadAttribute(fp, &isnull, delim, &newline, null_print);
|
string = CopyReadAttribute(fp, &isnull, delim,
|
||||||
|
&newline, null_print);
|
||||||
if (isnull)
|
if (isnull)
|
||||||
{
|
{
|
||||||
values[i] = PointerGetDatum(NULL);
|
/* already set values[i] and nulls[i] */
|
||||||
nulls[i] = 'n';
|
|
||||||
}
|
}
|
||||||
else if (string == NULL)
|
else if (string == NULL)
|
||||||
done = 1;
|
done = 1; /* end of file */
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
values[i] = FunctionCall3(&in_functions[i],
|
values[i] = FunctionCall3(&in_functions[i],
|
||||||
CStringGetDatum(string),
|
CStringGetDatum(string),
|
||||||
ObjectIdGetDatum(elements[i]),
|
ObjectIdGetDatum(elements[i]),
|
||||||
Int32GetDatum(typmod[i]));
|
Int32GetDatum(attr[i]->atttypmod));
|
||||||
|
nulls[i] = ' ';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!done)
|
if (!done)
|
||||||
@ -708,47 +758,103 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{ /* binary */
|
{ /* binary */
|
||||||
CopyGetData(&len, sizeof(int32), fp);
|
int16 fld_count,
|
||||||
if (CopyGetEof(fp))
|
fld_size;
|
||||||
|
|
||||||
|
CopyGetData(&fld_count, sizeof(int16), fp);
|
||||||
|
if (CopyGetEof(fp) ||
|
||||||
|
fld_count == -1)
|
||||||
done = 1;
|
done = 1;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (oids)
|
if (fld_count <= 0 || fld_count > attr_count)
|
||||||
|
elog(ERROR, "COPY BINARY: tuple field count is %d, expected %d",
|
||||||
|
(int) fld_count, attr_count);
|
||||||
|
|
||||||
|
if (file_has_oids)
|
||||||
{
|
{
|
||||||
CopyGetData(&loaded_oid, sizeof(int32), fp);
|
CopyGetData(&fld_size, sizeof(int16), fp);
|
||||||
|
if (CopyGetEof(fp))
|
||||||
|
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||||
|
if (fld_size != (int16) sizeof(Oid))
|
||||||
|
elog(ERROR, "COPY BINARY: sizeof(Oid) is %d, expected %d",
|
||||||
|
(int) fld_size, (int) sizeof(Oid));
|
||||||
|
CopyGetData(&loaded_oid, sizeof(Oid), fp);
|
||||||
|
if (CopyGetEof(fp))
|
||||||
|
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||||
if (loaded_oid == InvalidOid)
|
if (loaded_oid == InvalidOid)
|
||||||
elog(ERROR, "COPY BINARY: Invalid Oid");
|
elog(ERROR, "COPY BINARY: Invalid Oid");
|
||||||
}
|
}
|
||||||
CopyGetData(&null_ct, sizeof(int32), fp);
|
|
||||||
if (null_ct > 0)
|
for (i = 0; i < (int) fld_count; i++)
|
||||||
{
|
{
|
||||||
for (i = 0; i < null_ct; i++)
|
CopyGetData(&fld_size, sizeof(int16), fp);
|
||||||
|
if (CopyGetEof(fp))
|
||||||
|
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||||
|
if (fld_size == 0)
|
||||||
|
continue; /* it's NULL; nulls[i] already set */
|
||||||
|
if (fld_size != attr[i]->attlen)
|
||||||
|
elog(ERROR, "COPY BINARY: sizeof(field %d) is %d, expected %d",
|
||||||
|
i+1, (int) fld_size, (int) attr[i]->attlen);
|
||||||
|
if (fld_size == -1)
|
||||||
{
|
{
|
||||||
CopyGetData(&null_id, sizeof(int32), fp);
|
/* varlena field */
|
||||||
nulls[null_id] = 'n';
|
int32 varlena_size;
|
||||||
|
Pointer varlena_ptr;
|
||||||
|
|
||||||
|
CopyGetData(&varlena_size, sizeof(int32), fp);
|
||||||
|
if (CopyGetEof(fp))
|
||||||
|
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||||
|
if (varlena_size < (int32) sizeof(int32))
|
||||||
|
elog(ERROR, "COPY BINARY: bogus varlena length");
|
||||||
|
varlena_ptr = (Pointer) palloc(varlena_size);
|
||||||
|
VARATT_SIZEP(varlena_ptr) = varlena_size;
|
||||||
|
CopyGetData(VARDATA(varlena_ptr),
|
||||||
|
varlena_size - sizeof(int32),
|
||||||
|
fp);
|
||||||
|
if (CopyGetEof(fp))
|
||||||
|
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||||
|
values[i] = PointerGetDatum(varlena_ptr);
|
||||||
|
}
|
||||||
|
else if (!attr[i]->attbyval)
|
||||||
|
{
|
||||||
|
/* fixed-length pass-by-reference */
|
||||||
|
Pointer refval_ptr;
|
||||||
|
|
||||||
|
Assert(fld_size > 0);
|
||||||
|
refval_ptr = (Pointer) palloc(fld_size);
|
||||||
|
CopyGetData(refval_ptr, fld_size, fp);
|
||||||
|
if (CopyGetEof(fp))
|
||||||
|
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||||
|
values[i] = PointerGetDatum(refval_ptr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* pass-by-value */
|
||||||
|
Datum datumBuf;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We need this horsing around because we don't know
|
||||||
|
* how shorter data values are aligned within a Datum.
|
||||||
|
*/
|
||||||
|
Assert(fld_size > 0 && fld_size <= sizeof(Datum));
|
||||||
|
CopyGetData(&datumBuf, fld_size, fp);
|
||||||
|
if (CopyGetEof(fp))
|
||||||
|
elog(ERROR, "COPY BINARY: unexpected EOF");
|
||||||
|
values[i] = fetch_att(&datumBuf, true, fld_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
nulls[i] = ' ';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
string = (char *) palloc(len);
|
|
||||||
CopyGetData(string, len, fp);
|
|
||||||
|
|
||||||
ptr = string;
|
|
||||||
|
|
||||||
for (i = 0; i < attr_count; i++)
|
|
||||||
{
|
|
||||||
if (nulls[i] == 'n')
|
|
||||||
continue;
|
|
||||||
ptr = (char *) att_align((long) ptr, attr[i]->attlen, attr[i]->attalign);
|
|
||||||
values[i] = fetchatt(attr[i], ptr);
|
|
||||||
ptr = att_addlength(ptr, attr[i]->attlen, ptr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (done)
|
if (done)
|
||||||
continue;
|
break;
|
||||||
|
|
||||||
tuple = heap_formtuple(tupDesc, values, nulls);
|
tuple = heap_formtuple(tupDesc, values, nulls);
|
||||||
if (oids)
|
|
||||||
|
if (oids && file_has_oids)
|
||||||
tuple->t_data->t_oid = loaded_oid;
|
tuple->t_data->t_oid = loaded_oid;
|
||||||
|
|
||||||
skip_tuple = false;
|
skip_tuple = false;
|
||||||
@ -796,25 +902,13 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
|||||||
ExecARInsertTriggers(rel, tuple);
|
ExecARInsertTriggers(rel, tuple);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (binary)
|
|
||||||
pfree(string);
|
|
||||||
|
|
||||||
for (i = 0; i < attr_count; i++)
|
for (i = 0; i < attr_count; i++)
|
||||||
{
|
{
|
||||||
if (!attr[i]->attbyval && nulls[i] != 'n')
|
if (!attr[i]->attbyval && nulls[i] != 'n')
|
||||||
{
|
pfree(DatumGetPointer(values[i]));
|
||||||
if (!binary)
|
|
||||||
pfree((void *) values[i]);
|
|
||||||
}
|
|
||||||
/* reset nulls[] array for next time */
|
|
||||||
nulls[i] = ' ';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
heap_freetuple(tuple);
|
heap_freetuple(tuple);
|
||||||
tuples_read++;
|
|
||||||
|
|
||||||
if (!reading_to_eof && ntuples == tuples_read)
|
|
||||||
done = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -829,7 +923,6 @@ CopyFrom(Relation rel, bool binary, bool oids, FILE *fp,
|
|||||||
{
|
{
|
||||||
pfree(in_functions);
|
pfree(in_functions);
|
||||||
pfree(elements);
|
pfree(elements);
|
||||||
pfree(typmod);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ExecDropTupleTable(tupleTable, true);
|
ExecDropTupleTable(tupleTable, true);
|
||||||
@ -1099,26 +1192,3 @@ CopyAttributeOut(FILE *fp, char *server_string, char *delim)
|
|||||||
pfree(string_start); /* pfree pg_server_to_client result */
|
pfree(string_start); /* pfree pg_server_to_client result */
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Returns the number of tuples in a relation. Unfortunately, currently
|
|
||||||
* must do a scan of the entire relation to determine this.
|
|
||||||
*
|
|
||||||
* relation is expected to be an open relation descriptor.
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
CountTuples(Relation relation)
|
|
||||||
{
|
|
||||||
HeapScanDesc scandesc;
|
|
||||||
HeapTuple tuple;
|
|
||||||
|
|
||||||
int i;
|
|
||||||
|
|
||||||
scandesc = heap_beginscan(relation, 0, QuerySnapshot, 0, NULL);
|
|
||||||
|
|
||||||
i = 0;
|
|
||||||
while (HeapTupleIsValid(tuple = heap_getnext(scandesc, 0)))
|
|
||||||
i++;
|
|
||||||
heap_endscan(scandesc);
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user