From 3ff9058b29e5410a976c1013484f55df27f132a5 Mon Sep 17 00:00:00 2001 From: jian he Date: Mon, 10 Nov 2025 08:40:26 +0800 Subject: [PATCH v21 2/3] json format for COPY TO JSON format is only supported with the COPY TO operation. It is incompatible with options such as HEADER, DEFAULT, NULL, DELIMITER, and several others. This has been thoroughly tested in src/test/regress/sql/copy.sql The CopyFormat enum was originally contributed by Joel Jacobson joel@compiler.org, later refactored by Jian He to address various issues, and further adapted by Junwang Zhao to support the newly introduced CopyToRoutine struct (commit 2e4127b6d2). Author: Joe Conway Reviewed-by: "Andrey M. Borodin" , Reviewed-by: Dean Rasheed , Reviewed-by: Daniel Verite , Reviewed-by: Andrew Dunstan , Reviewed-by: Davin Shearer , Reviewed-by: Masahiko Sawada , Reviewed-by: Alvaro Herrera discussion: https://postgr.es/m/CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU%3Dkcg%40mail.gmail.com discussion: https://postgr.es/m/6a04628d-0d53-41d9-9e35-5a8dc302c34c@joeconway.com --- doc/src/sgml/ref/copy.sgml | 13 +++-- src/backend/commands/copy.c | 72 +++++++++++++++++++++------- src/backend/commands/copyto.c | 76 ++++++++++++++++++++++++++---- src/backend/parser/gram.y | 8 ++++ src/backend/utils/adt/json.c | 5 +- src/bin/psql/tab-complete.in.c | 2 +- src/include/commands/copy.h | 1 + src/include/utils/json.h | 2 + src/test/regress/expected/copy.out | 76 ++++++++++++++++++++++++++++++ src/test/regress/sql/copy.sql | 47 ++++++++++++++++++ 10 files changed, 268 insertions(+), 34 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 53b0ea8f573..320f5f1edd4 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -228,10 +228,15 @@ COPY { table_name [ ( text, csv (Comma Separated Values), + json (JavaScript Object Notation), or binary. The default is text. See below for details. + + The json option is allowed only in + COPY TO. + @@ -266,7 +271,7 @@ COPY { table_name [ ( CSV format. This must be a single one-byte character. - This option is not allowed when using binary format. + This option is not allowed when using binary or json format. @@ -280,7 +285,7 @@ COPY { table_name [ ( CSV format. You might prefer an empty string even in text format for cases where you don't want to distinguish nulls from empty strings. - This option is not allowed when using binary format. + This option is not allowed when using binary or json format. @@ -303,7 +308,7 @@ COPY { table_name [ ( COPY FROM, and only when - not using binary format. + not using binary or json format. @@ -330,7 +335,7 @@ COPY { table_name [ ( COPY FROM commands. - This option is not allowed when using binary format. + This option is not allowed when using binary or json format. diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index d674ada98e4..0ec9b22d20f 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -585,6 +585,8 @@ ProcessCopyOptions(ParseState *pstate, opts_out->format = COPY_FORMAT_CSV; else if (strcmp(fmt, "binary") == 0) opts_out->format = COPY_FORMAT_BINARY; + else if (strcmp(fmt, "json") == 0) + opts_out->format = COPY_FORMAT_JSON; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), @@ -744,21 +746,42 @@ ProcessCopyOptions(ParseState *pstate, * Check for incompatible options (must do these three before inserting * defaults) */ - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->delim) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ - errmsg("cannot specify %s in BINARY mode", "DELIMITER"))); + if (opts_out->delim) + { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("cannot specify %s in BINARY mode", "DELIMITER")); + else if (opts_out->format == COPY_FORMAT_JSON) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in JSON mode", "DELIMITER")); + } - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->null_print) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("cannot specify %s in BINARY mode", "NULL"))); + if (opts_out->null_print) + { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in BINARY mode", "NULL")); + else if (opts_out->format == COPY_FORMAT_JSON) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in JSON mode", "NULL")); + } - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->default_print) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("cannot specify %s in BINARY mode", "DEFAULT"))); + if (opts_out->default_print) + { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in BINARY mode", "DEFAULT")); + else if (opts_out->format == COPY_FORMAT_JSON) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot specify %s in JSON mode", "DEFAULT")); + } /* Set defaults for omitted options */ if (!opts_out->delim) @@ -824,11 +847,18 @@ ProcessCopyOptions(ParseState *pstate, errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim))); /* Check header */ - if (opts_out->format == COPY_FORMAT_BINARY && opts_out->header_line != COPY_HEADER_FALSE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ - errmsg("cannot specify %s in BINARY mode", "HEADER"))); + if (opts_out->header_line != COPY_HEADER_FALSE) + { + if (opts_out->format == COPY_FORMAT_BINARY) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ + errmsg("cannot specify %s in BINARY mode", "HEADER")); + else if(opts_out->format == COPY_FORMAT_JSON) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify %s in JSON mode", "HEADER")); + } /* Check quote */ if (opts_out->format != COPY_FORMAT_CSV && opts_out->quote != NULL) @@ -932,6 +962,12 @@ ProcessCopyOptions(ParseState *pstate, errmsg("COPY %s cannot be used with %s", "FREEZE", "COPY TO"))); + /* Check json format */ + if (opts_out->format == COPY_FORMAT_JSON && is_from) + ereport(ERROR, + errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("COPY %s mode cannot be used with %s", "json", "COPY FROM")); + if (opts_out->default_print) { if (!is_from) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index c97f0460b3e..accf34e1a60 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -26,6 +26,7 @@ #include "executor/execdesc.h" #include "executor/executor.h" #include "executor/tuptable.h" +#include "funcapi.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "mb/pg_wchar.h" @@ -33,6 +34,7 @@ #include "pgstat.h" #include "storage/fd.h" #include "tcop/tcopprot.h" +#include "utils/json.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -130,6 +132,7 @@ static void CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot); static void CopyToTextLikeOneRow(CopyToState cstate, TupleTableSlot *slot, bool is_csv); static void CopyToTextLikeEnd(CopyToState cstate); +static void CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot); static void CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc); static void CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo); static void CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot); @@ -149,7 +152,7 @@ static void CopySendInt16(CopyToState cstate, int16 val); /* * COPY TO routines for built-in formats. * - * CSV and text formats share the same TextLike routines except for the + * CSV and text, json formats share the same TextLike routines except for the * one-row callback. */ @@ -169,6 +172,14 @@ static const CopyToRoutine CopyToRoutineCSV = { .CopyToEnd = CopyToTextLikeEnd, }; +/* json format */ +static const CopyToRoutine CopyToRoutineJson = { + .CopyToStart = CopyToTextLikeStart, + .CopyToOutFunc = CopyToTextLikeOutFunc, + .CopyToOneRow = CopyToJsonOneRow, + .CopyToEnd = CopyToTextLikeEnd, +}; + /* binary format */ static const CopyToRoutine CopyToRoutineBinary = { .CopyToStart = CopyToBinaryStart, @@ -185,12 +196,14 @@ CopyToGetRoutine(const CopyFormatOptions *opts) return &CopyToRoutineCSV; else if (opts->format == COPY_FORMAT_BINARY) return &CopyToRoutineBinary; + else if (opts->format == COPY_FORMAT_JSON) + return &CopyToRoutineJson; /* default is text */ return &CopyToRoutineText; } -/* Implementation of the start callback for text and CSV formats */ +/* Implementation of the start callback for text, CSV, and json formats */ static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) { @@ -209,6 +222,8 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) ListCell *cur; bool hdr_delim = false; + Assert(cstate->opts.format != COPY_FORMAT_JSON); + foreach(cur, cstate->attnumlist) { int attnum = lfirst_int(cur); @@ -231,7 +246,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) } /* - * Implementation of the outfunc callback for text and CSV formats. Assign + * Implementation of the outfunc callback for text, CSV, and json formats. Assign * the output function data to the given *finfo. */ static void @@ -304,13 +319,46 @@ CopyToTextLikeOneRow(CopyToState cstate, CopySendTextLikeEndOfRow(cstate); } -/* Implementation of the end callback for text and CSV formats */ +/* Implementation of the end callback for text, CSV, and json formats */ static void CopyToTextLikeEnd(CopyToState cstate) { /* Nothing to do here */ } +/* Implementation of per-row callback for json format */ +static void +CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot) +{ + Datum rowdata; + StringInfo result; + + /* + * If COPY TO source data come from query rather than plain table, we need + * copy CopyToState->QueryDesc->TupleDesc to slot->tts_tupleDescriptor. + * This is necessary because the slot's TupleDesc may change during query + * execution, and we depend on it when calling composite_to_json. + */ + if (!cstate->rel) + { + memcpy(TupleDescAttr(slot->tts_tupleDescriptor, 0), + TupleDescAttr(cstate->queryDesc->tupDesc, 0), + cstate->queryDesc->tupDesc->natts * sizeof(FormData_pg_attribute)); + + for (int i = 0; i < cstate->queryDesc->tupDesc->natts; i++) + populate_compact_attribute(slot->tts_tupleDescriptor, i); + + BlessTupleDesc(slot->tts_tupleDescriptor); + } + rowdata = ExecFetchSlotHeapTupleDatum(slot); + result = makeStringInfo(); + composite_to_json(rowdata, result, false); + + CopySendData(cstate, result->data, result->len); + + CopySendTextLikeEndOfRow(cstate); +} + /* * Implementation of the start callback for binary format. Send a header * for a binary copy. @@ -402,9 +450,21 @@ SendCopyBegin(CopyToState cstate) pq_beginmessage(&buf, PqMsg_CopyOutResponse); pq_sendbyte(&buf, format); /* overall format */ - pq_sendint16(&buf, natts); - for (i = 0; i < natts; i++) - pq_sendint16(&buf, format); /* per-column formats */ + if (cstate->opts.format != COPY_FORMAT_JSON) + { + pq_sendint16(&buf, natts); + for (i = 0; i < natts; i++) + pq_sendint16(&buf, format); /* per-column formats */ + } + else + { + /* + * JSON format is always one non-binary column + */ + pq_sendint16(&buf, 1); + pq_sendint16(&buf, 0); + } + pq_endmessage(&buf); cstate->copy_dest = COPY_FRONTEND; } @@ -504,7 +564,7 @@ CopySendEndOfRow(CopyToState cstate) } /* - * Wrapper function of CopySendEndOfRow for text and CSV formats. Sends the + * Wrapper function of CopySendEndOfRow for text, CSV, and json formats. Sends the * line termination and do common appropriate things for the end of row. */ static inline void diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index c3a0a354a9c..dd23c075710 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -3557,6 +3557,10 @@ copy_opt_item: { $$ = makeDefElem("format", (Node *) makeString("csv"), @1); } + | JSON + { + $$ = makeDefElem("format", (Node *) makeString("json"), @1); + } | HEADER_P { $$ = makeDefElem("header", (Node *) makeBoolean(true), @1); @@ -3639,6 +3643,10 @@ copy_generic_opt_elem: { $$ = makeDefElem($1, $2, @1); } + | FORMAT_LA copy_generic_opt_arg + { + $$ = makeDefElem("format", $2, @1); + } ; copy_generic_opt_arg: diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 06dd62f0008..647adafd227 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -86,8 +86,6 @@ typedef struct JsonAggState JsonUniqueBuilderState unique_check; } JsonAggState; -static void composite_to_json(Datum composite, StringInfo result, - bool use_line_feeds); static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, const Datum *vals, const bool *nulls, int *valcount, JsonTypeCategory tcategory, Oid outfuncoid, @@ -517,8 +515,9 @@ array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds) /* * Turn a composite / record into JSON. + * Exported so COPY TO can use it. */ -static void +void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds) { HeapTupleHeader td; diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 20d7a65c614..2029b1930c8 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -3377,7 +3377,7 @@ match_previous_words(int pattern_id, /* Complete COPY FROM|TO [PROGRAM] WITH (FORMAT */ else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAnyExcept("PROGRAM"), "WITH", "(", "FORMAT") || Matches("COPY|\\copy", MatchAny, "FROM|TO", "PROGRAM", MatchAny, "WITH", "(", "FORMAT")) - COMPLETE_WITH("binary", "csv", "text"); + COMPLETE_WITH("binary", "csv", "text", "json"); /* Complete COPY FROM [PROGRAM] filename WITH (ON_ERROR */ else if (Matches("COPY|\\copy", MatchAny, "FROM", MatchAnyExcept("PROGRAM"), "WITH", "(", "ON_ERROR") || diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 686653233b2..85aedc267d6 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -56,6 +56,7 @@ typedef enum CopyFormat COPY_FORMAT_TEXT = 0, COPY_FORMAT_BINARY, COPY_FORMAT_CSV, + COPY_FORMAT_JSON, } CopyFormat; /* diff --git a/src/include/utils/json.h b/src/include/utils/json.h index 49bbda7ac06..1fa8e2ce8e2 100644 --- a/src/include/utils/json.h +++ b/src/include/utils/json.h @@ -17,6 +17,8 @@ #include "lib/stringinfo.h" /* functions in json.c */ +extern void composite_to_json(Datum composite, StringInfo result, + bool use_line_feeds); extern void escape_json(StringInfo buf, const char *str); extern void escape_json_with_len(StringInfo buf, const char *str, int len); extern void escape_json_text(StringInfo buf, const text *txt); diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index 24e0f472f14..10333357d68 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -73,6 +73,82 @@ copy copytest3 to stdout csv header; c1,"col with , comma","col with "" quote" 1,a,1 2,b,2 +--- test copying in JSON mode with various styles +copy copytest to stdout json; +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +copy copytest to stdout (format json); +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +-- all of the following should yield error +copy copytest to stdout (format json, delimiter '|'); +ERROR: cannot specify DELIMITER in JSON mode +copy copytest to stdout (format json, null '\N'); +ERROR: cannot specify NULL in JSON mode +copy copytest to stdout (format json, default '|'); +ERROR: cannot specify DEFAULT in JSON mode +copy copytest to stdout (format json, header); +ERROR: cannot specify HEADER in JSON mode +copy copytest to stdout (format json, header 1); +ERROR: cannot specify HEADER in JSON mode +copy copytest to stdout (format json, quote '"'); +ERROR: COPY QUOTE requires CSV mode +copy copytest to stdout (format json, escape '"'); +ERROR: COPY ESCAPE requires CSV mode +copy copytest to stdout (format json, force_quote *); +ERROR: COPY FORCE_QUOTE requires CSV mode +copy copytest to stdout (format json, force_not_null *); +ERROR: COPY FORCE_NOT_NULL requires CSV mode +copy copytest to stdout (format json, force_null *); +ERROR: COPY FORCE_NULL requires CSV mode +copy copytest to stdout (format json, on_error ignore); +ERROR: COPY ON_ERROR cannot be used with COPY TO +LINE 1: copy copytest to stdout (format json, on_error ignore); + ^ +copy copytest from stdin(format json); +ERROR: COPY json mode cannot be used with COPY FROM +-- all of the above should yield error +-- embedded escaped characters +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); +copy copyjsontest to stdout json; +{"id":1,"f1":"line with \" in it: 1","f2":"1997-02-10T17:32:01-08:00"} +{"id":2,"f1":"line with ' in it: 2","f2":"1997-02-10T17:32:01-08:00"} +{"id":3,"f1":"line with \" in it: 3","f2":"1997-02-10T17:32:01-08:00"} +{"id":4,"f1":"line with ' in it: 4","f2":"1997-02-10T17:32:01-08:00"} +{"id":5,"f1":"line with \" in it: 5","f2":"1997-02-10T17:32:01-08:00"} +{"id":1,"f1":"aaa\"bbb","f2":null} +{"id":2,"f1":"aaa\\bbb","f2":null} +{"id":3,"f1":"aaa/bbb","f2":null} +{"id":4,"f1":"aaa\bbbb","f2":null} +{"id":5,"f1":"aaa\fbbb","f2":null} +{"id":6,"f1":"aaa\nbbb","f2":null} +{"id":7,"f1":"aaa\rbbb","f2":null} +{"id":8,"f1":"aaa\tbbb","f2":null} create temp table copytest4 ( c1 int, "colname with tab: " text); diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index 676a8b342b5..80799e2ead9 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -82,6 +82,53 @@ this is just a line full of junk that would error out if parsed copy copytest3 to stdout csv header; +--- test copying in JSON mode with various styles +copy copytest to stdout json; +copy copytest to stdout (format json); + +-- all of the following should yield error +copy copytest to stdout (format json, delimiter '|'); +copy copytest to stdout (format json, null '\N'); +copy copytest to stdout (format json, default '|'); +copy copytest to stdout (format json, header); +copy copytest to stdout (format json, header 1); +copy copytest to stdout (format json, quote '"'); +copy copytest to stdout (format json, escape '"'); +copy copytest to stdout (format json, force_quote *); +copy copytest to stdout (format json, force_not_null *); +copy copytest to stdout (format json, force_null *); +copy copytest to stdout (format json, on_error ignore); +copy copytest from stdin(format json); +-- all of the above should yield error + +-- embedded escaped characters +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); + +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); + +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); + +copy copyjsontest to stdout json; + create temp table copytest4 ( c1 int, "colname with tab: " text); -- 2.34.1