From b3d3d6023f96aa7971a0663d8c0bd6de50e877a5 Mon Sep 17 00:00:00 2001 From: jian he Date: Mon, 19 Feb 2024 10:37:18 +0800 Subject: [PATCH v9 1/2] Add another COPY fomrat: json this format is only allowed in COPY TO operation. discussion: https://postgr.es/m/CALvfUkBxTYy5uWPFVwpk_7ii2zgT07t3d-yR_cy4sfrrLU%3Dkcg%40mail.gmail.com discussion: https://postgr.es/m/6a04628d-0d53-41d9-9e35-5a8dc302c34c@joeconway.com --- doc/src/sgml/ref/copy.sgml | 5 ++ src/backend/commands/copy.c | 13 +++ src/backend/commands/copyto.c | 125 ++++++++++++++++++++--------- src/backend/parser/gram.y | 8 ++ src/backend/utils/adt/json.c | 5 +- src/include/commands/copy.h | 1 + src/include/utils/json.h | 2 + src/test/regress/expected/copy.out | 54 +++++++++++++ src/test/regress/sql/copy.sql | 38 +++++++++ 9 files changed, 208 insertions(+), 43 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 55764fc1..ef9e4729 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -214,9 +214,14 @@ COPY { table_name [ ( text, csv (Comma Separated Values), + json (JavaScript Object Notation), or binary. The default is text. + + The json option is allowed only in + COPY TO. + diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index cc0786c6..5d5b733d 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -480,6 +480,8 @@ ProcessCopyOptions(ParseState *pstate, /* default format */ ; else if (strcmp(fmt, "csv") == 0) opts_out->csv_mode = true; + else if (strcmp(fmt, "json") == 0) + opts_out->json_mode = true; else if (strcmp(fmt, "binary") == 0) opts_out->binary = true; else @@ -716,6 +718,11 @@ ProcessCopyOptions(ParseState *pstate, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot specify HEADER in BINARY mode"))); + if (opts_out->json_mode && opts_out->header_line) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot specify HEADER in JSON mode"))); + /* Check quote */ if (!opts_out->csv_mode && opts_out->quote != NULL) ereport(ERROR, @@ -793,6 +800,12 @@ ProcessCopyOptions(ParseState *pstate, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("COPY FREEZE cannot be used with COPY TO"))); + /* Check json format */ + if (opts_out->json_mode && is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use JSON mode in COPY FROM"))); + if (opts_out->default_print) { if (!is_from) diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index 20ffc903..c948a431 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -28,6 +28,7 @@ #include "executor/execdesc.h" #include "executor/executor.h" #include "executor/tuptable.h" +#include "funcapi.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" #include "mb/pg_wchar.h" @@ -37,6 +38,7 @@ #include "rewrite/rewriteHandler.h" #include "storage/fd.h" #include "tcop/tcopprot.h" +#include "utils/json.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/partcache.h" @@ -86,6 +88,7 @@ typedef struct CopyToStateData List *attnumlist; /* integer list of attnums to copy */ char *filename; /* filename, or NULL for STDOUT */ bool is_program; /* is 'filename' a program to popen? */ + bool json_row_delim_needed; /* need delimiter to start next json array element */ copy_data_dest_cb data_dest_cb; /* function for writing data */ CopyFormatOptions opts; @@ -146,9 +149,20 @@ SendCopyBegin(CopyToState cstate) pq_beginmessage(&buf, PqMsg_CopyOutResponse); pq_sendbyte(&buf, format); /* overall format */ - pq_sendint16(&buf, natts); - for (i = 0; i < natts; i++) - pq_sendint16(&buf, format); /* per-column formats */ + if (!cstate->opts.json_mode) + { + pq_sendint16(&buf, natts); + for (i = 0; i < natts; i++) + pq_sendint16(&buf, format); /* per-column formats */ + } + else + { + /* + * JSON mode is always one non-binary column + */ + pq_sendint16(&buf, 1); + pq_sendint16(&buf, 0); + } pq_endmessage(&buf); cstate->copy_dest = COPY_FRONTEND; } @@ -907,11 +921,7 @@ DoCopyTo(CopyToState cstate) static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot) { - bool need_delim = false; - FmgrInfo *out_functions = cstate->out_functions; MemoryContext oldcontext; - ListCell *cur; - char *string; MemoryContextReset(cstate->rowcontext); oldcontext = MemoryContextSwitchTo(cstate->rowcontext); @@ -922,53 +932,88 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot) CopySendInt16(cstate, list_length(cstate->attnumlist)); } - /* Make sure the tuple is fully deconstructed */ - slot_getallattrs(slot); - - foreach(cur, cstate->attnumlist) + if (!cstate->opts.json_mode) { - int attnum = lfirst_int(cur); - Datum value = slot->tts_values[attnum - 1]; - bool isnull = slot->tts_isnull[attnum - 1]; + bool need_delim = false; + FmgrInfo *out_functions = cstate->out_functions; + ListCell *cur; + char *string; - if (!cstate->opts.binary) - { - if (need_delim) - CopySendChar(cstate, cstate->opts.delim[0]); - need_delim = true; - } + /* Make sure the tuple is fully deconstructed */ + slot_getallattrs(slot); - if (isnull) - { - if (!cstate->opts.binary) - CopySendString(cstate, cstate->opts.null_print_client); - else - CopySendInt32(cstate, -1); - } - else + foreach(cur, cstate->attnumlist) { + int attnum = lfirst_int(cur); + Datum value = slot->tts_values[attnum - 1]; + bool isnull = slot->tts_isnull[attnum - 1]; + if (!cstate->opts.binary) { - string = OutputFunctionCall(&out_functions[attnum - 1], - value); - if (cstate->opts.csv_mode) - CopyAttributeOutCSV(cstate, string, - cstate->opts.force_quote_flags[attnum - 1]); + if (need_delim) + CopySendChar(cstate, cstate->opts.delim[0]); + need_delim = true; + } + + if (isnull) + { + if (!cstate->opts.binary) + CopySendString(cstate, cstate->opts.null_print_client); else - CopyAttributeOutText(cstate, string); + CopySendInt32(cstate, -1); } else { - bytea *outputbytes; + if (!cstate->opts.binary) + { + string = OutputFunctionCall(&out_functions[attnum - 1], + value); + if (cstate->opts.csv_mode) + CopyAttributeOutCSV(cstate, string, + cstate->opts.force_quote_flags[attnum - 1]); + else + CopyAttributeOutText(cstate, string); + } + else + { + bytea *outputbytes; - outputbytes = SendFunctionCall(&out_functions[attnum - 1], - value); - CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ); - CopySendData(cstate, VARDATA(outputbytes), - VARSIZE(outputbytes) - VARHDRSZ); + outputbytes = SendFunctionCall(&out_functions[attnum - 1], + value); + CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ); + CopySendData(cstate, VARDATA(outputbytes), + VARSIZE(outputbytes) - VARHDRSZ); + } } } } + else + { + Datum rowdata; + StringInfo result; + + /* + * iff COPY TO command's source data is from a query, not a relation, + * then we need to copy the TupleDesc from the cstate->queryDesc. + * because query execution returning slot's TupleDesc may change, + * composite_to_json requires correct TupleDesc. + */ + if(!cstate->rel) + { + for (int i = 0; i < slot->tts_tupleDescriptor->natts; i++) + { + memcpy(TupleDescAttr(slot->tts_tupleDescriptor, i), + TupleDescAttr(cstate->queryDesc->tupDesc, i), + 1 * sizeof(FormData_pg_attribute)); + } + BlessTupleDesc(slot->tts_tupleDescriptor); + } + rowdata = ExecFetchSlotHeapTupleDatum(slot); + result = makeStringInfo(); + composite_to_json(rowdata, result, false); + + CopySendData(cstate, result->data, result->len); + } CopySendEndOfRow(cstate); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 60b31d9f..ada49e4c 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -3424,6 +3424,10 @@ copy_opt_item: { $$ = makeDefElem("format", (Node *) makeString("csv"), @1); } + | JSON + { + $$ = makeDefElem("format", (Node *) makeString("json"), @1); + } | HEADER_P { $$ = makeDefElem("header", (Node *) makeBoolean(true), @1); @@ -3506,6 +3510,10 @@ copy_generic_opt_elem: { $$ = makeDefElem($1, $2, @1); } + | FORMAT_LA copy_generic_opt_arg + { + $$ = makeDefElem("format", $2, @1); + } ; copy_generic_opt_arg: diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index d719a61f..fabd4e61 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -83,8 +83,6 @@ typedef struct JsonAggState JsonUniqueBuilderState unique_check; } JsonAggState; -static void composite_to_json(Datum composite, StringInfo result, - bool use_line_feeds); static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals, bool *nulls, int *valcount, JsonTypeCategory tcategory, Oid outfuncoid, @@ -507,8 +505,9 @@ array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds) /* * Turn a composite / record into JSON. + * Exported so COPY TO can use it. */ -static void +void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds) { HeapTupleHeader td; diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index b3da3cb0..f591b613 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -53,6 +53,7 @@ typedef struct CopyFormatOptions bool binary; /* binary format? */ bool freeze; /* freeze rows on loading? */ bool csv_mode; /* Comma Separated Value format? */ + bool json_mode; /* JSON format? */ CopyHeaderChoice header_line; /* header line? */ char *null_print; /* NULL marker string (server encoding!) */ int null_print_len; /* length of same */ diff --git a/src/include/utils/json.h b/src/include/utils/json.h index 6d7f1b38..d5631171 100644 --- a/src/include/utils/json.h +++ b/src/include/utils/json.h @@ -17,6 +17,8 @@ #include "lib/stringinfo.h" /* functions in json.c */ +extern void composite_to_json(Datum composite, StringInfo result, + bool use_line_feeds); extern void escape_json(StringInfo buf, const char *str); extern char *JsonEncodeDateTime(char *buf, Datum value, Oid typid, const int *tzp); diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index b48365ec..0c5ade47 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -42,6 +42,60 @@ copy copytest3 to stdout csv header; c1,"col with , comma","col with "" quote" 1,a,1 2,b,2 +--- test copying in JSON mode with various styles +copy copytest to stdout json; +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +copy copytest to stdout (format json); +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +-- Error +copy copytest to stdout (format json, header); +ERROR: cannot specify HEADER in JSON mode +-- Error +copy copytest from stdout (format json); +ERROR: cannot use JSON mode in COPY FROM +-- embedded escaped characters +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); +copy copyjsontest to stdout json; +{"id":1,"f1":"line with \" in it: 1","f2":"1997-02-10T17:32:01-08:00"} +{"id":2,"f1":"line with ' in it: 2","f2":"1997-02-10T17:32:01-08:00"} +{"id":3,"f1":"line with \" in it: 3","f2":"1997-02-10T17:32:01-08:00"} +{"id":4,"f1":"line with ' in it: 4","f2":"1997-02-10T17:32:01-08:00"} +{"id":5,"f1":"line with \" in it: 5","f2":"1997-02-10T17:32:01-08:00"} +{"id":1,"f1":"aaa\"bbb","f2":null} +{"id":2,"f1":"aaa\\bbb","f2":null} +{"id":3,"f1":"aaa/bbb","f2":null} +{"id":4,"f1":"aaa\bbbb","f2":null} +{"id":5,"f1":"aaa\fbbb","f2":null} +{"id":6,"f1":"aaa\nbbb","f2":null} +{"id":7,"f1":"aaa\rbbb","f2":null} +{"id":8,"f1":"aaa\tbbb","f2":null} create temp table copytest4 ( c1 int, "colname with tab: " text); diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index 43d2e906..da6b0b0a 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -54,6 +54,44 @@ this is just a line full of junk that would error out if parsed copy copytest3 to stdout csv header; +--- test copying in JSON mode with various styles +copy copytest to stdout json; + +copy copytest to stdout (format json); + +-- Error +copy copytest to stdout (format json, header); +-- Error +copy copytest from stdout (format json); + +-- embedded escaped characters +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); + +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); + +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); + +copy copyjsontest to stdout json; + create temp table copytest4 ( c1 int, "colname with tab: " text); -- 2.34.1