From e25beadf7cdbe3864806a8fa26e5c1dd9adc1c62 Mon Sep 17 00:00:00 2001 From: Shinya Kato Date: Thu, 15 Jan 2026 16:19:30 +0900 Subject: [PATCH v3] file_fdw: Support multi-line HEADER option. Commit bc2f348 introduced multi-line HEADER support for COPY. This commit extends support for this option to file_fdw. Similar to the HEADER option in COPY, this option allows multiple header lines to be skipped. Since the CREATE/ALTER FOREIGN TABLE commands require foreign table options to be single-quoted, this commit updates defGetCopyHeaderOption() to also handle string values. Author: Shinya Kato Reviewed-by: Fujii Masao Reviewed-by: songjinzhou Reviewed-by: Japin Li Reviewed-by: Chao Li Discussion: https://postgr.es/m/CAOzEurT+iwC47VHPMS+uJ4WSzvOLPsZ2F2_wopm8M7O+CZa3Xw@mail.gmail.com --- contrib/file_fdw/data/multiline_header.csv | 4 + contrib/file_fdw/expected/file_fdw.out | 29 ++++- contrib/file_fdw/sql/file_fdw.sql | 13 ++ doc/src/sgml/file-fdw.sgml | 9 +- src/backend/commands/copy.c | 134 ++++++++++++--------- src/test/regress/expected/copy.out | 18 +++ src/test/regress/expected/copy2.out | 6 + src/test/regress/sql/copy.sql | 15 +++ src/test/regress/sql/copy2.sql | 3 + 9 files changed, 170 insertions(+), 61 deletions(-) create mode 100644 contrib/file_fdw/data/multiline_header.csv diff --git a/contrib/file_fdw/data/multiline_header.csv b/contrib/file_fdw/data/multiline_header.csv new file mode 100644 index 00000000000..0d5e482a1e4 --- /dev/null +++ b/contrib/file_fdw/data/multiline_header.csv @@ -0,0 +1,4 @@ +first header line +second header line +1,alpha +2,beta diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out index 5121e27dce5..bd0fe8b0c2e 100644 --- a/contrib/file_fdw/expected/file_fdw.out +++ b/contrib/file_fdw/expected/file_fdw.out @@ -104,6 +104,12 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (reject_limit '1'); ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (on_error 'ignore', reject_limit '0'); -- ERROR ERROR: REJECT_LIMIT (0) must be greater than zero +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '-1'); -- ERROR +ERROR: a negative integer value cannot be specified for header +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '2.5'); -- ERROR +ERROR: header requires a Boolean value, a non-negative integer, or the string "match" +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header 'unsupported'); -- ERROR +ERROR: header requires a Boolean value, a non-negative integer, or the string "match" CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR ERROR: either filename or program is required for file_fdw foreign tables \set filename :abs_srcdir '/data/agg.data' @@ -142,6 +148,25 @@ OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match'); SELECT * FROM header_doesnt_match; -- ERROR ERROR: column name mismatch in header line field 1: got "1", expected "a" CONTEXT: COPY header_doesnt_match, line 1: "1,foo" +-- test multi-line header +\set filename :abs_srcdir '/data/multiline_header.csv' +CREATE FOREIGN TABLE multi_header (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '2'); +SELECT * FROM multi_header ORDER BY a; + a | b +---+------- + 1 | alpha + 2 | beta +(2 rows) + +CREATE FOREIGN TABLE multi_header_skip (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '5'); +SELECT count(*) FROM multi_header_skip; + count +------- + 0 +(1 row) + -- per-column options tests \set filename :abs_srcdir '/data/text.csv' CREATE FOREIGN TABLE text_csv ( @@ -543,7 +568,7 @@ SET ROLE regress_file_fdw_superuser; -- cleanup RESET ROLE; DROP EXTENSION file_fdw CASCADE; -NOTICE: drop cascades to 9 other objects +NOTICE: drop cascades to 11 other objects DETAIL: drop cascades to server file_server drop cascades to user mapping for regress_file_fdw_superuser on server file_server drop cascades to user mapping for regress_no_priv_user on server file_server @@ -552,5 +577,7 @@ drop cascades to foreign table agg_csv drop cascades to foreign table agg_bad drop cascades to foreign table header_match drop cascades to foreign table header_doesnt_match +drop cascades to foreign table multi_header +drop cascades to foreign table multi_header_skip drop cascades to foreign table text_csv DROP ROLE regress_file_fdw_superuser, regress_file_fdw_user, regress_no_priv_user; diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql index 1a397ad4bd1..408affcf87f 100644 --- a/contrib/file_fdw/sql/file_fdw.sql +++ b/contrib/file_fdw/sql/file_fdw.sql @@ -84,6 +84,9 @@ CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (format 'binary', on_erro CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (log_verbosity 'unsupported'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (reject_limit '1'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (on_error 'ignore', reject_limit '0'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '-1'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header '2.5'); -- ERROR +CREATE FOREIGN TABLE tbl () SERVER file_server OPTIONS (header 'unsupported'); -- ERROR CREATE FOREIGN TABLE tbl () SERVER file_server; -- ERROR \set filename :abs_srcdir '/data/agg.data' @@ -119,6 +122,16 @@ CREATE FOREIGN TABLE header_doesnt_match (a int, foo text) SERVER file_server OPTIONS (format 'csv', filename :'filename', delimiter ',', header 'match'); SELECT * FROM header_doesnt_match; -- ERROR +-- test multi-line header +\set filename :abs_srcdir '/data/multiline_header.csv' +CREATE FOREIGN TABLE multi_header (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '2'); +SELECT * FROM multi_header ORDER BY a; + +CREATE FOREIGN TABLE multi_header_skip (a int, b text) SERVER file_server +OPTIONS (format 'csv', filename :'filename', header '5'); +SELECT count(*) FROM multi_header_skip; + -- per-column options tests \set filename :abs_srcdir '/data/text.csv' CREATE FOREIGN TABLE text_csv ( diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml index 882d9a76d21..ff9435720c2 100644 --- a/doc/src/sgml/file-fdw.sgml +++ b/doc/src/sgml/file-fdw.sgml @@ -65,7 +65,7 @@ - Specifies whether the data has a header line, + Specifies whether to skip a header line, or how many header lines to skip, the same as COPY's HEADER option. @@ -166,9 +166,10 @@ Note that while COPY allows options such as HEADER to be specified without a corresponding value, the foreign table option - syntax requires a value to be present in all cases. To activate - COPY options typically written without a value, you can pass - the value TRUE, since all such options are Booleans. + syntax requires a value to be present in all cases. Use the appropriate + explicit value (for example TRUE, match, + or a non-negative integer line count for HEADER) to enable + the desired behavior. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 9c51384ab92..092d7e57bce 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -28,6 +28,7 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#include "nodes/miscnodes.h" #include "optimizer/optimizer.h" #include "parser/parse_coerce.h" #include "parser/parse_collate.h" @@ -364,6 +365,76 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, table_close(rel, NoLock); } +/* + * Validate the HEADER option integer for COPY and return the validated + * non-negative line count (number of header lines to skip). + */ +static int +defCheckCopyHeaderInteger(DefElem *def, int header, bool is_from) +{ + if (header < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("a negative integer value cannot be " + "specified for %s", def->defname))); + + if (!is_from && header > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use multi-line header in COPY TO"))); + + return header; +} + +/* + * Validate the HEADER option string for COPY and return either a + * non-negative line count (number of header lines to skip) or one of + * the COPY_HEADER_* sentinel values + */ +static int +defCheckCopyHeaderString(DefElem *def, char *header, bool is_from) +{ + int ival; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + /* + * The set of strings accepted here should match up with the + * grammar's opt_boolean_or_string production. + */ + if (pg_strcasecmp(header, "true") == 0) + return COPY_HEADER_TRUE; + if (pg_strcasecmp(header, "on") == 0) + return COPY_HEADER_TRUE; + if (pg_strcasecmp(header, "false") == 0) + return COPY_HEADER_FALSE; + if (pg_strcasecmp(header, "off") == 0) + return COPY_HEADER_FALSE; + if (pg_strcasecmp(header, "match") == 0) + { + if (!is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use \"%s\" with HEADER in COPY TO", + header))); + return COPY_HEADER_MATCH; + } + + /* Check if the header is a valid integer */ + ival = pg_strtoint32_safe(header, (Node *)&escontext); + if (escontext.error_occurred) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("%s requires a Boolean value, a non-negative integer, " + "or the string \"match\"", + def->defname))); + + /* + * If it's not a recognized keyword, treat it as a numeric value to + * preserve the existing error reporting for invalid integers. + */ + return defCheckCopyHeaderInteger(def, ival, is_from); +} + /* * Extract the CopyFormatOptions.header_line value from a DefElem. * @@ -380,63 +451,14 @@ defGetCopyHeaderOption(DefElem *def, bool is_from) return COPY_HEADER_TRUE; /* - * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or - * "match". + * Allow 0 (no header lines, same as "false"/"off"), 1 (one header line, + * same as "true"/"on"), a non-negative integer line count (also as a + * string, to support file_fdw options), or "match". */ - switch (nodeTag(def->arg)) - { - case T_Integer: - { - int ival = intVal(def->arg); - - if (ival < 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("a negative integer value cannot be " - "specified for %s", def->defname))); - - if (!is_from && ival > 1) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot use multi-line header in COPY TO"))); - - return ival; - } - break; - default: - { - char *sval = defGetString(def); - - /* - * The set of strings accepted here should match up with the - * grammar's opt_boolean_or_string production. - */ - if (pg_strcasecmp(sval, "true") == 0) - return COPY_HEADER_TRUE; - if (pg_strcasecmp(sval, "false") == 0) - return COPY_HEADER_FALSE; - if (pg_strcasecmp(sval, "on") == 0) - return COPY_HEADER_TRUE; - if (pg_strcasecmp(sval, "off") == 0) - return COPY_HEADER_FALSE; - if (pg_strcasecmp(sval, "match") == 0) - { - if (!is_from) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot use \"%s\" with HEADER in COPY TO", - sval))); - return COPY_HEADER_MATCH; - } - } - break; - } - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("%s requires a Boolean value, a non-negative integer, " - "or the string \"match\"", - def->defname))); - return COPY_HEADER_FALSE; /* keep compiler quiet */ + if (IsA(def->arg, Integer)) + return defCheckCopyHeaderInteger(def, intVal(def->arg), is_from); + else + return defCheckCopyHeaderString(def, defGetString(def), is_from); } /* diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index 24e0f472f14..5176df15e24 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -104,6 +104,24 @@ select count(*) from copytest5; 0 (1 row) +-- test header line feature (given as strings) +truncate copytest5; +copy copytest5 from stdin (format csv, header '0'); +select * from copytest5 order by c1; + c1 +---- + 1 + 2 +(2 rows) + +truncate copytest5; +copy copytest5 from stdin (format csv, header '1'); +select * from copytest5 order by c1; + c1 +---- + 2 +(1 row) + -- test copy from with a partitioned table create table parted_copytest ( a int, diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index f3fdce23459..00170c6b414 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -138,6 +138,12 @@ COPY x from stdin with (header 2.5); ERROR: header requires a Boolean value, a non-negative integer, or the string "match" COPY x to stdout with (header 2); ERROR: cannot use multi-line header in COPY TO +COPY x to stdout with (header '-1'); +ERROR: a negative integer value cannot be specified for header +COPY x from stdin with (header '2.5'); +ERROR: header requires a Boolean value, a non-negative integer, or the string "match" +COPY x to stdout with (header '2'); +ERROR: cannot use multi-line header in COPY TO -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; ERROR: column "d" specified more than once diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index 676a8b342b5..65cbdaf7f3e 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -124,6 +124,21 @@ this is a second header line. \. select count(*) from copytest5; +-- test header line feature (given as strings) +truncate copytest5; +copy copytest5 from stdin (format csv, header '0'); +1 +2 +\. +select * from copytest5 order by c1; + +truncate copytest5; +copy copytest5 from stdin (format csv, header '1'); +1 +2 +\. +select * from copytest5 order by c1; + -- test copy from with a partitioned table create table parted_copytest ( a int, diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index cef45868db5..66435167500 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -93,6 +93,9 @@ COPY x from stdin with (on_error ignore, reject_limit 0); COPY x from stdin with (header -1); COPY x from stdin with (header 2.5); COPY x to stdout with (header 2); +COPY x to stdout with (header '-1'); +COPY x from stdin with (header '2.5'); +COPY x to stdout with (header '2'); -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; -- 2.47.3