mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-04 00:02:52 -05:00 
			
		
		
		
	Support multi-line headers in COPY FROM command.
The COPY FROM command now accepts a non-negative integer for the HEADER option, allowing multiple header lines to be skipped. This is useful when the input contains multi-line headers that should be ignored during data import. Author: Shinya Kato <shinya11.kato@gmail.com> Co-authored-by: Fujii Masao <masao.fujii@gmail.com> Reviewed-by: Yugo Nagata <nagata@sraoss.co.jp> Discussion: https://postgr.es/m/CAOzEurRPxfzbxqeOPF_AGnAUOYf=Wk0we+1LQomPNUNtyZGBZw@mail.gmail.com
This commit is contained in:
		
							parent
							
								
									fd7d7b7191
								
							
						
					
					
						commit
						bc2f348e87
					
				@ -37,7 +37,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
 | 
			
		||||
    DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
 | 
			
		||||
    NULL '<replaceable class="parameter">null_string</replaceable>'
 | 
			
		||||
    DEFAULT '<replaceable class="parameter">default_string</replaceable>'
 | 
			
		||||
    HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
 | 
			
		||||
    HEADER [ <replaceable class="parameter">boolean</replaceable> | <replaceable class="parameter">integer</replaceable> | MATCH ]
 | 
			
		||||
    QUOTE '<replaceable class="parameter">quote_character</replaceable>'
 | 
			
		||||
    ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
 | 
			
		||||
    FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
 | 
			
		||||
@ -212,6 +212,15 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
 | 
			
		||||
    </listitem>
 | 
			
		||||
   </varlistentry>
 | 
			
		||||
 | 
			
		||||
   <varlistentry>
 | 
			
		||||
    <term><replaceable class="parameter">integer</replaceable></term>
 | 
			
		||||
    <listitem>
 | 
			
		||||
     <para>
 | 
			
		||||
      Specifies a non-negative integer value passed to the selected option.
 | 
			
		||||
     </para>
 | 
			
		||||
    </listitem>
 | 
			
		||||
   </varlistentry>
 | 
			
		||||
 | 
			
		||||
   <varlistentry>
 | 
			
		||||
    <term><literal>FORMAT</literal></term>
 | 
			
		||||
    <listitem>
 | 
			
		||||
@ -303,16 +312,25 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
 | 
			
		||||
    <term><literal>HEADER</literal></term>
 | 
			
		||||
    <listitem>
 | 
			
		||||
     <para>
 | 
			
		||||
      Specifies that the file contains a header line with the names of each
 | 
			
		||||
      column in the file.  On output, the first line contains the column
 | 
			
		||||
      names from the table.  On input, the first line is discarded when this
 | 
			
		||||
      option is set to <literal>true</literal> (or equivalent Boolean value).
 | 
			
		||||
      If this option is set to <literal>MATCH</literal>, the number and names
 | 
			
		||||
      of the columns in the header line must match the actual column names of
 | 
			
		||||
      the table, in order;  otherwise an error is raised.
 | 
			
		||||
      On output, if this option is set to <literal>true</literal>
 | 
			
		||||
      (or an equivalent Boolean value), the first line of the output will
 | 
			
		||||
      contain the column names from the table.
 | 
			
		||||
      Integer values <literal>0</literal> and <literal>1</literal> are
 | 
			
		||||
      accepted as Boolean values, but other integers are not allowed for
 | 
			
		||||
      <command>COPY TO</command> commands.
 | 
			
		||||
     </para>
 | 
			
		||||
     <para>
 | 
			
		||||
      On input, if this option is set to <literal>true</literal>
 | 
			
		||||
      (or an equivalent Boolean value), the first line of the input is
 | 
			
		||||
      discarded.  If set to a non-negative integer, that number of
 | 
			
		||||
      lines are discarded.  If set to <literal>MATCH</literal>, the first line
 | 
			
		||||
      is discarded, and it must contain column names that exactly match the
 | 
			
		||||
      table's columns, in both number and order; otherwise, an error is raised.
 | 
			
		||||
      The <literal>MATCH</literal> value is only valid for
 | 
			
		||||
      <command>COPY FROM</command> commands.
 | 
			
		||||
     </para>
 | 
			
		||||
     <para>
 | 
			
		||||
      This option is not allowed when using <literal>binary</literal> format.
 | 
			
		||||
      The <literal>MATCH</literal> option is only valid for <command>COPY
 | 
			
		||||
      FROM</command> commands.
 | 
			
		||||
     </para>
 | 
			
		||||
    </listitem>
 | 
			
		||||
   </varlistentry>
 | 
			
		||||
 | 
			
		||||
@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Extract a CopyHeaderChoice value from a DefElem.  This is like
 | 
			
		||||
 * defGetBoolean() but also accepts the special value "match".
 | 
			
		||||
 * Extract the CopyFormatOptions.header_line value from a DefElem.
 | 
			
		||||
 *
 | 
			
		||||
 * Parses the HEADER option for COPY, which can be a boolean, a non-negative
 | 
			
		||||
 * integer (number of lines to skip), or the special value "match".
 | 
			
		||||
 */
 | 
			
		||||
static CopyHeaderChoice
 | 
			
		||||
defGetCopyHeaderChoice(DefElem *def, bool is_from)
 | 
			
		||||
static int
 | 
			
		||||
defGetCopyHeaderOption(DefElem *def, bool is_from)
 | 
			
		||||
{
 | 
			
		||||
	/*
 | 
			
		||||
	 * If no parameter value given, assume "true" is meant.
 | 
			
		||||
@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
 | 
			
		||||
		return COPY_HEADER_TRUE;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * Allow 0, 1, "true", "false", "on", "off", or "match".
 | 
			
		||||
	 * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
 | 
			
		||||
	 * "match".
 | 
			
		||||
	 */
 | 
			
		||||
	switch (nodeTag(def->arg))
 | 
			
		||||
	{
 | 
			
		||||
		case T_Integer:
 | 
			
		||||
			switch (intVal(def->arg))
 | 
			
		||||
			{
 | 
			
		||||
				case 0:
 | 
			
		||||
					return COPY_HEADER_FALSE;
 | 
			
		||||
				case 1:
 | 
			
		||||
					return COPY_HEADER_TRUE;
 | 
			
		||||
				default:
 | 
			
		||||
					/* otherwise, error out below */
 | 
			
		||||
					break;
 | 
			
		||||
				int			ival = intVal(def->arg);
 | 
			
		||||
 | 
			
		||||
				if (ival < 0)
 | 
			
		||||
					ereport(ERROR,
 | 
			
		||||
							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
			
		||||
							 errmsg("a negative integer value cannot be "
 | 
			
		||||
									"specified for %s", def->defname)));
 | 
			
		||||
 | 
			
		||||
				if (!is_from && ival > 1)
 | 
			
		||||
					ereport(ERROR,
 | 
			
		||||
							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 | 
			
		||||
							 errmsg("cannot use multi-line header in COPY TO")));
 | 
			
		||||
 | 
			
		||||
				return ival;
 | 
			
		||||
			}
 | 
			
		||||
			break;
 | 
			
		||||
		default:
 | 
			
		||||
@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
 | 
			
		||||
	}
 | 
			
		||||
	ereport(ERROR,
 | 
			
		||||
			(errcode(ERRCODE_SYNTAX_ERROR),
 | 
			
		||||
			 errmsg("%s requires a Boolean value or \"match\"",
 | 
			
		||||
			 errmsg("%s requires a Boolean value, a non-negative integer, "
 | 
			
		||||
					"or the string \"match\"",
 | 
			
		||||
					def->defname)));
 | 
			
		||||
	return COPY_HEADER_FALSE;	/* keep compiler quiet */
 | 
			
		||||
}
 | 
			
		||||
@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
 | 
			
		||||
			if (header_specified)
 | 
			
		||||
				errorConflictingDefElem(defel, pstate);
 | 
			
		||||
			header_specified = true;
 | 
			
		||||
			opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
 | 
			
		||||
			opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
 | 
			
		||||
		}
 | 
			
		||||
		else if (strcmp(defel->defname, "quote") == 0)
 | 
			
		||||
		{
 | 
			
		||||
@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
 | 
			
		||||
				 errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
 | 
			
		||||
 | 
			
		||||
	/* Check header */
 | 
			
		||||
	if (opts_out->binary && opts_out->header_line)
 | 
			
		||||
	if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
 | 
			
		||||
		ereport(ERROR,
 | 
			
		||||
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 | 
			
		||||
		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
 | 
			
		||||
 | 
			
		||||
@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
 | 
			
		||||
NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
 | 
			
		||||
{
 | 
			
		||||
	int			fldct;
 | 
			
		||||
	bool		done;
 | 
			
		||||
	bool		done = false;
 | 
			
		||||
 | 
			
		||||
	/* only available for text or csv input */
 | 
			
		||||
	Assert(!cstate->opts.binary);
 | 
			
		||||
 | 
			
		||||
	/* on input check that the header line is correct if needed */
 | 
			
		||||
	if (cstate->cur_lineno == 0 && cstate->opts.header_line)
 | 
			
		||||
	if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
 | 
			
		||||
	{
 | 
			
		||||
		ListCell   *cur;
 | 
			
		||||
		TupleDesc	tupDesc;
 | 
			
		||||
		int			lines_to_skip = cstate->opts.header_line;
 | 
			
		||||
 | 
			
		||||
		/* If set to "match", one header line is skipped */
 | 
			
		||||
		if (cstate->opts.header_line == COPY_HEADER_MATCH)
 | 
			
		||||
			lines_to_skip = 1;
 | 
			
		||||
 | 
			
		||||
		tupDesc = RelationGetDescr(cstate->rel);
 | 
			
		||||
 | 
			
		||||
		cstate->cur_lineno++;
 | 
			
		||||
		done = CopyReadLine(cstate, is_csv);
 | 
			
		||||
		for (int i = 0; i < lines_to_skip; i++)
 | 
			
		||||
		{
 | 
			
		||||
			cstate->cur_lineno++;
 | 
			
		||||
			if ((done = CopyReadLine(cstate, is_csv)))
 | 
			
		||||
				break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if (cstate->opts.header_line == COPY_HEADER_MATCH)
 | 
			
		||||
		{
 | 
			
		||||
 | 
			
		||||
@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
 | 
			
		||||
														  cstate->file_encoding);
 | 
			
		||||
 | 
			
		||||
	/* if a header has been requested send the line */
 | 
			
		||||
	if (cstate->opts.header_line)
 | 
			
		||||
	if (cstate->opts.header_line == COPY_HEADER_TRUE)
 | 
			
		||||
	{
 | 
			
		||||
		ListCell   *cur;
 | 
			
		||||
		bool		hdr_delim = false;
 | 
			
		||||
 | 
			
		||||
@ -20,15 +20,12 @@
 | 
			
		||||
#include "tcop/dest.h"
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Represents whether a header line should be present, and whether it must
 | 
			
		||||
 * match the actual names (which implies "true").
 | 
			
		||||
 * Represents whether a header line must match the actual names
 | 
			
		||||
 * (which implies "true"), and whether it should be present.
 | 
			
		||||
 */
 | 
			
		||||
typedef enum CopyHeaderChoice
 | 
			
		||||
{
 | 
			
		||||
	COPY_HEADER_FALSE = 0,
 | 
			
		||||
	COPY_HEADER_TRUE,
 | 
			
		||||
	COPY_HEADER_MATCH,
 | 
			
		||||
} CopyHeaderChoice;
 | 
			
		||||
#define COPY_HEADER_MATCH	-1
 | 
			
		||||
#define COPY_HEADER_FALSE	0
 | 
			
		||||
#define COPY_HEADER_TRUE	1
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Represents where to save input processing errors.  More values to be added
 | 
			
		||||
@ -64,7 +61,8 @@ typedef struct CopyFormatOptions
 | 
			
		||||
	bool		binary;			/* binary format? */
 | 
			
		||||
	bool		freeze;			/* freeze rows on loading? */
 | 
			
		||||
	bool		csv_mode;		/* Comma Separated Value format? */
 | 
			
		||||
	CopyHeaderChoice header_line;	/* header line? */
 | 
			
		||||
	int			header_line;	/* number of lines to skip or COPY_HEADER_XXX
 | 
			
		||||
								 * value (see the above) */
 | 
			
		||||
	char	   *null_print;		/* NULL marker string (server encoding!) */
 | 
			
		||||
	int			null_print_len; /* length of same */
 | 
			
		||||
	char	   *null_print_client;	/* same converted to file encoding */
 | 
			
		||||
 | 
			
		||||
@ -81,6 +81,29 @@ copy copytest4 to stdout (header);
 | 
			
		||||
c1	colname with tab: \t
 | 
			
		||||
1	a
 | 
			
		||||
2	b
 | 
			
		||||
-- test multi-line header line feature
 | 
			
		||||
create temp table copytest5 (c1 int);
 | 
			
		||||
copy copytest5 from stdin (format csv, header 2);
 | 
			
		||||
copy copytest5 to stdout (header);
 | 
			
		||||
c1
 | 
			
		||||
1
 | 
			
		||||
2
 | 
			
		||||
truncate copytest5;
 | 
			
		||||
copy copytest5 from stdin (format csv, header 4);
 | 
			
		||||
select count(*) from copytest5;
 | 
			
		||||
 count 
 | 
			
		||||
-------
 | 
			
		||||
     0
 | 
			
		||||
(1 row)
 | 
			
		||||
 | 
			
		||||
truncate copytest5;
 | 
			
		||||
copy copytest5 from stdin (format csv, header 5);
 | 
			
		||||
select count(*) from copytest5;
 | 
			
		||||
 count 
 | 
			
		||||
-------
 | 
			
		||||
     0
 | 
			
		||||
(1 row)
 | 
			
		||||
 | 
			
		||||
-- test copy from with a partitioned table
 | 
			
		||||
create table parted_copytest (
 | 
			
		||||
	a int,
 | 
			
		||||
@ -224,7 +247,7 @@ alter table header_copytest add column c text;
 | 
			
		||||
copy header_copytest to stdout with (header match);
 | 
			
		||||
ERROR:  cannot use "match" with HEADER in COPY TO
 | 
			
		||||
copy header_copytest from stdin with (header wrong_choice);
 | 
			
		||||
ERROR:  header requires a Boolean value or "match"
 | 
			
		||||
ERROR:  header requires a Boolean value, a non-negative integer, or the string "match"
 | 
			
		||||
-- works
 | 
			
		||||
copy header_copytest from stdin with (header match);
 | 
			
		||||
copy header_copytest (c, a, b) from stdin with (header match);
 | 
			
		||||
 | 
			
		||||
@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1);
 | 
			
		||||
ERROR:  COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
 | 
			
		||||
COPY x from stdin with (on_error ignore, reject_limit 0);
 | 
			
		||||
ERROR:  REJECT_LIMIT (0) must be greater than zero
 | 
			
		||||
COPY x from stdin with (header -1);
 | 
			
		||||
ERROR:  a negative integer value cannot be specified for header
 | 
			
		||||
COPY x from stdin with (header 2.5);
 | 
			
		||||
ERROR:  header requires a Boolean value, a non-negative integer, or the string "match"
 | 
			
		||||
COPY x to stdout with (header 2);
 | 
			
		||||
ERROR:  cannot use multi-line header in COPY TO
 | 
			
		||||
-- too many columns in column list: should fail
 | 
			
		||||
COPY x (a, b, c, d, e, d, c) from stdin;
 | 
			
		||||
ERROR:  column "d" specified more than once
 | 
			
		||||
 | 
			
		||||
@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed
 | 
			
		||||
 | 
			
		||||
copy copytest4 to stdout (header);
 | 
			
		||||
 | 
			
		||||
-- test multi-line header line feature
 | 
			
		||||
 | 
			
		||||
create temp table copytest5 (c1 int);
 | 
			
		||||
 | 
			
		||||
copy copytest5 from stdin (format csv, header 2);
 | 
			
		||||
this is a first header line.
 | 
			
		||||
this is a second header line.
 | 
			
		||||
1
 | 
			
		||||
2
 | 
			
		||||
\.
 | 
			
		||||
copy copytest5 to stdout (header);
 | 
			
		||||
 | 
			
		||||
truncate copytest5;
 | 
			
		||||
copy copytest5 from stdin (format csv, header 4);
 | 
			
		||||
this is a first header line.
 | 
			
		||||
this is a second header line.
 | 
			
		||||
1
 | 
			
		||||
2
 | 
			
		||||
\.
 | 
			
		||||
select count(*) from copytest5;
 | 
			
		||||
 | 
			
		||||
truncate copytest5;
 | 
			
		||||
copy copytest5 from stdin (format csv, header 5);
 | 
			
		||||
this is a first header line.
 | 
			
		||||
this is a second header line.
 | 
			
		||||
1
 | 
			
		||||
2
 | 
			
		||||
\.
 | 
			
		||||
select count(*) from copytest5;
 | 
			
		||||
 | 
			
		||||
-- test copy from with a partitioned table
 | 
			
		||||
create table parted_copytest (
 | 
			
		||||
	a int,
 | 
			
		||||
 | 
			
		||||
@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported);
 | 
			
		||||
COPY x from stdin (log_verbosity unsupported);
 | 
			
		||||
COPY x from stdin with (reject_limit 1);
 | 
			
		||||
COPY x from stdin with (on_error ignore, reject_limit 0);
 | 
			
		||||
COPY x from stdin with (header -1);
 | 
			
		||||
COPY x from stdin with (header 2.5);
 | 
			
		||||
COPY x to stdout with (header 2);
 | 
			
		||||
 | 
			
		||||
-- too many columns in column list: should fail
 | 
			
		||||
COPY x (a, b, c, d, e, d, c) from stdin;
 | 
			
		||||
 | 
			
		||||
@ -521,7 +521,6 @@ CopyFormatOptions
 | 
			
		||||
CopyFromRoutine
 | 
			
		||||
CopyFromState
 | 
			
		||||
CopyFromStateData
 | 
			
		||||
CopyHeaderChoice
 | 
			
		||||
CopyInsertMethod
 | 
			
		||||
CopyLogVerbosityChoice
 | 
			
		||||
CopyMethod
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user