From 7dc6c1c798178f31728d048d4d528181626b3695 Mon Sep 17 00:00:00 2001 From: Zhao Junwang Date: Sat, 27 Jan 2024 13:34:38 +0800 Subject: [PATCH v8 10/10] introduce contrib/pg_copy_json Signed-off-by: Zhao Junwang --- contrib/Makefile | 1 + contrib/meson.build | 1 + contrib/pg_copy_json/.gitignore | 4 + contrib/pg_copy_json/Makefile | 23 ++ .../pg_copy_json/expected/pg_copy_json.out | 80 +++++++ contrib/pg_copy_json/meson.build | 34 +++ contrib/pg_copy_json/pg_copy_json--1.0.sql | 9 + contrib/pg_copy_json/pg_copy_json.c | 218 ++++++++++++++++++ contrib/pg_copy_json/pg_copy_json.control | 5 + contrib/pg_copy_json/sql/pg_copy_json.sql | 59 +++++ src/backend/utils/adt/json.c | 5 +- src/include/utils/json.h | 2 + 12 files changed, 438 insertions(+), 3 deletions(-) create mode 100644 contrib/pg_copy_json/.gitignore create mode 100644 contrib/pg_copy_json/Makefile create mode 100644 contrib/pg_copy_json/expected/pg_copy_json.out create mode 100644 contrib/pg_copy_json/meson.build create mode 100644 contrib/pg_copy_json/pg_copy_json--1.0.sql create mode 100644 contrib/pg_copy_json/pg_copy_json.c create mode 100644 contrib/pg_copy_json/pg_copy_json.control create mode 100644 contrib/pg_copy_json/sql/pg_copy_json.sql diff --git a/contrib/Makefile b/contrib/Makefile index da4e2316a3..82cc496aa2 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -32,6 +32,7 @@ SUBDIRS = \ pageinspect \ passwordcheck \ pg_buffercache \ + pg_copy_json \ pg_freespacemap \ pg_prewarm \ pg_stat_statements \ diff --git a/contrib/meson.build b/contrib/meson.build index c12dc906ca..38933d15d1 100644 --- a/contrib/meson.build +++ b/contrib/meson.build @@ -45,6 +45,7 @@ subdir('oid2name') subdir('pageinspect') subdir('passwordcheck') subdir('pg_buffercache') +subdir('pg_copy_json') subdir('pgcrypto') subdir('pg_freespacemap') subdir('pg_prewarm') diff --git a/contrib/pg_copy_json/.gitignore b/contrib/pg_copy_json/.gitignore new file mode 100644 index 0000000000..5dcb3ff972 --- /dev/null +++ b/contrib/pg_copy_json/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/contrib/pg_copy_json/Makefile b/contrib/pg_copy_json/Makefile new file mode 100644 index 0000000000..b0a348d618 --- /dev/null +++ b/contrib/pg_copy_json/Makefile @@ -0,0 +1,23 @@ +# contrib/pg_copy_json//Makefile + +MODULE_big = pg_copy_json +OBJS = \ + $(WIN32RES) \ + pg_copy_json.o +PGFILEDESC = "pg_copy_json - COPY TO JSON (JavaScript Object Notation) format" + +EXTENSION = pg_copy_json +DATA = pg_copy_json--1.0.sql + +REGRESS = test_copy_format + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pg_copy_json +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/pg_copy_json/expected/pg_copy_json.out b/contrib/pg_copy_json/expected/pg_copy_json.out new file mode 100644 index 0000000000..73633c2303 --- /dev/null +++ b/contrib/pg_copy_json/expected/pg_copy_json.out @@ -0,0 +1,80 @@ +-- +-- COPY TO JSON +-- +CREATE EXTENSION pg_copy_json; +-- test copying in JSON format with various styles +-- of embedded line ending characters +create temp table copytest ( + style text, + test text, + filler int); +insert into copytest values('DOS',E'abc\r\ndef',1); +insert into copytest values('Unix',E'abc\ndef',2); +insert into copytest values('Mac',E'abc\rdef',3); +insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4); +copy copytest to stdout with (format 'json'); +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +-- pg_copy_json do not support COPY FROM +copy copytest from stdout with (format 'json'); +ERROR: cannot use JSON mode in COPY FROM +-- test copying in JSON format with various styles +-- of embedded escaped characters +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); +copy copyjsontest to stdout with (format 'json'); +{"id":1,"f1":"line with \" in it: 1","f2":"1997-02-10T17:32:01-08:00"} +{"id":2,"f1":"line with ' in it: 2","f2":"1997-02-10T17:32:01-08:00"} +{"id":3,"f1":"line with \" in it: 3","f2":"1997-02-10T17:32:01-08:00"} +{"id":4,"f1":"line with ' in it: 4","f2":"1997-02-10T17:32:01-08:00"} +{"id":5,"f1":"line with \" in it: 5","f2":"1997-02-10T17:32:01-08:00"} +{"id":1,"f1":"aaa\"bbb","f2":null} +{"id":2,"f1":"aaa\\bbb","f2":null} +{"id":3,"f1":"aaa/bbb","f2":null} +{"id":4,"f1":"aaa\bbbb","f2":null} +{"id":5,"f1":"aaa\fbbb","f2":null} +{"id":6,"f1":"aaa\nbbb","f2":null} +{"id":7,"f1":"aaa\rbbb","f2":null} +{"id":8,"f1":"aaa\tbbb","f2":null} +-- test force array +copy copytest to stdout (format 'json', force_array); +[ + {"style":"DOS","test":"abc\r\ndef","filler":1} +,{"style":"Unix","test":"abc\ndef","filler":2} +,{"style":"Mac","test":"abc\rdef","filler":3} +,{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +] +copy copytest to stdout (format 'json', force_array true); +[ + {"style":"DOS","test":"abc\r\ndef","filler":1} +,{"style":"Unix","test":"abc\ndef","filler":2} +,{"style":"Mac","test":"abc\rdef","filler":3} +,{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} +] +copy copytest to stdout (format 'json', force_array false); +{"style":"DOS","test":"abc\r\ndef","filler":1} +{"style":"Unix","test":"abc\ndef","filler":2} +{"style":"Mac","test":"abc\rdef","filler":3} +{"style":"esc\\ape","test":"a\\r\\\r\\\n\\nb","filler":4} diff --git a/contrib/pg_copy_json/meson.build b/contrib/pg_copy_json/meson.build new file mode 100644 index 0000000000..71f9338267 --- /dev/null +++ b/contrib/pg_copy_json/meson.build @@ -0,0 +1,34 @@ +# Copyright (c) 2024, PostgreSQL Global Development Group + +pg_copy_json_sources = files( + 'pg_copy_json.c', +) + +if host_system == 'windows' + pg_copy_json_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'pg_copy_json', + '--FILEDESC', 'pg_copy_json - COPY TO JSON format',]) +endif + +pg_copy_json = shared_module('pg_copy_json', + pg_copy_json_sources, + kwargs: contrib_mod_args, +) +contrib_targets += pg_copy_json + +install_data( + 'pg_copy_json--1.0.sql', + 'pg_copy_json.control', + kwargs: contrib_data_args, +) + +tests += { + 'name': 'pg_copy_json', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'pg_copy_json', + ], + }, +} diff --git a/contrib/pg_copy_json/pg_copy_json--1.0.sql b/contrib/pg_copy_json/pg_copy_json--1.0.sql new file mode 100644 index 0000000000..d738a1e7e9 --- /dev/null +++ b/contrib/pg_copy_json/pg_copy_json--1.0.sql @@ -0,0 +1,9 @@ +/* contrib/pg_copy_json/copy_json--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION pg_copy_json" to load this file. \quit + +CREATE FUNCTION pg_catalog.json(internal) + RETURNS copy_handler + AS 'MODULE_PATHNAME', 'copy_json' + LANGUAGE C; diff --git a/contrib/pg_copy_json/pg_copy_json.c b/contrib/pg_copy_json/pg_copy_json.c new file mode 100644 index 0000000000..cbfdee8e8b --- /dev/null +++ b/contrib/pg_copy_json/pg_copy_json.c @@ -0,0 +1,218 @@ +/*-------------------------------------------------------------------------- + * + * pg_copy_json.c + * COPY TO JSON (JavaScript Object Notation) format. + * + * Portions Copyright (c) 2024, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/test_copy_format.c + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "commands/copy.h" +#include "commands/defrem.h" +#include "funcapi.h" +#include "libpq/libpq.h" +#include "libpq/pqformat.h" +#include "utils/json.h" + +PG_MODULE_MAGIC; + +typedef struct +{ + /* + * Force output of square brackets as array decorations at the beginning + * and end of output, with commas between the rows. + */ + bool force_array; + bool force_array_specified; + + /* need delimiter to start next json array element */ + bool json_row_delim_needed; +} CopyJsonData; + +static inline void +InitCopyJsonData(CopyJsonData *p) +{ + Assert(p); + p->force_array = false; + p->force_array_specified = false; + p->json_row_delim_needed = false; +} + +static void +CopyToJsonSendEndOfRow(CopyToState cstate) +{ + switch (cstate->copy_dest) + { + case COPY_DEST_FILE: + /* Default line termination depends on platform */ +#ifndef WIN32 + CopySendChar(cstate, '\n'); +#else + CopySendString(cstate, "\r\n"); +#endif + break; + case COPY_DEST_FRONTEND: + /* The FE/BE protocol uses \n as newline for all platforms */ + CopySendChar(cstate, '\n'); + break; + default: + break; + } + CopyToStateFlush(cstate); +} + +static bool +CopyToJsonProcessOption(CopyToState cstate, DefElem *defel) +{ + CopyJsonData *p; + + if (cstate->opaque == NULL) + { + MemoryContext oldcontext; + oldcontext = MemoryContextSwitchTo(cstate->copycontext); + cstate->opaque = palloc0(sizeof(CopyJsonData)); + MemoryContextSwitchTo(oldcontext); + InitCopyJsonData(cstate->opaque); + } + + p = (CopyJsonData *)cstate->opaque; + + if (strcmp(defel->defname, "force_array") == 0) + { + if (p->force_array_specified) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("CopyToJsonProcessOption: redundant options \"%s\"=\"%s\"", + defel->defname, defGetString(defel))); + p->force_array_specified = true; + p->force_array = defGetBoolean(defel); + + return true; + } + + return false; +} + +static void +CopyToJsonSendCopyBegin(CopyToState cstate) +{ + StringInfoData buf; + int16 format = 0; + + pq_beginmessage(&buf, PqMsg_CopyOutResponse); + pq_sendbyte(&buf, format); /* overall format */ + /* + * JSON mode is always one non-binary column + */ + pq_sendint16(&buf, 1); + pq_sendint16(&buf, 0); + pq_endmessage(&buf); +} + +static void +CopyToJsonStart(CopyToState cstate, TupleDesc tupDesc) +{ + CopyJsonData *p; + + if (cstate->opaque == NULL) + { + MemoryContext oldcontext; + oldcontext = MemoryContextSwitchTo(cstate->copycontext); + cstate->opaque = palloc0(sizeof(CopyJsonData)); + MemoryContextSwitchTo(oldcontext); + InitCopyJsonData(cstate->opaque); + } + + /* No need to alloc cstate->out_functions */ + + p = (CopyJsonData *)cstate->opaque; + + /* If FORCE_ARRAY has been specified send the open bracket. */ + if (p->force_array) + { + CopySendChar(cstate, '['); + CopyToJsonSendEndOfRow(cstate); + } +} + +static void +CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot) +{ + Datum rowdata; + StringInfo result; + CopyJsonData *p; + + Assert(cstate->opaque); + p = (CopyJsonData *)cstate->opaque; + + if(!cstate->rel) + { + for (int i = 0; i < slot->tts_tupleDescriptor->natts; i++) + { + /* Flat-copy the attribute array */ + memcpy(TupleDescAttr(slot->tts_tupleDescriptor, i), + TupleDescAttr(cstate->queryDesc->tupDesc, i), + 1 * sizeof(FormData_pg_attribute)); + } + BlessTupleDesc(slot->tts_tupleDescriptor); + } + rowdata = ExecFetchSlotHeapTupleDatum(slot); + result = makeStringInfo(); + composite_to_json(rowdata, result, false); + + if (p->json_row_delim_needed) + CopySendChar(cstate, ','); + else if (p->force_array) + { + /* first row needs no delimiter */ + CopySendChar(cstate, ' '); + p->json_row_delim_needed = true; + } + CopySendData(cstate, result->data, result->len); + CopyToJsonSendEndOfRow(cstate); +} + +static void +CopyToJsonEnd(CopyToState cstate) +{ + CopyJsonData *p; + + Assert(cstate->opaque); + p = (CopyJsonData *)cstate->opaque; + + /* If FORCE_ARRAY has been specified send the close bracket. */ + if (p->force_array) + { + CopySendChar(cstate, ']'); + CopyToJsonSendEndOfRow(cstate); + } +} + +static const CopyToRoutine CopyToRoutineJson = { + .type = T_CopyToRoutine, + .CopyToProcessOption = CopyToJsonProcessOption, + .CopyToSendCopyBegin = CopyToJsonSendCopyBegin, + .CopyToStart = CopyToJsonStart, + .CopyToOneRow = CopyToJsonOneRow, + .CopyToEnd = CopyToJsonEnd, +}; + +PG_FUNCTION_INFO_V1(copy_json); +Datum +copy_json(PG_FUNCTION_ARGS) +{ + bool is_from = PG_GETARG_BOOL(0); + + if (is_from) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use JSON mode in COPY FROM"))); + + PG_RETURN_POINTER(&CopyToRoutineJson); +} diff --git a/contrib/pg_copy_json/pg_copy_json.control b/contrib/pg_copy_json/pg_copy_json.control new file mode 100644 index 0000000000..90b0a74603 --- /dev/null +++ b/contrib/pg_copy_json/pg_copy_json.control @@ -0,0 +1,5 @@ +# pg_copy_json extension +comment = 'COPY TO JSON format' +default_version = '1.0' +module_pathname = '$libdir/pg_copy_json' +relocatable = true diff --git a/contrib/pg_copy_json/sql/pg_copy_json.sql b/contrib/pg_copy_json/sql/pg_copy_json.sql new file mode 100644 index 0000000000..73e7e514ac --- /dev/null +++ b/contrib/pg_copy_json/sql/pg_copy_json.sql @@ -0,0 +1,59 @@ +-- +-- COPY TO JSON +-- + +CREATE EXTENSION pg_copy_json; + +-- test copying in JSON format with various styles +-- of embedded line ending characters + +create temp table copytest ( + style text, + test text, + filler int); + +insert into copytest values('DOS',E'abc\r\ndef',1); +insert into copytest values('Unix',E'abc\ndef',2); +insert into copytest values('Mac',E'abc\rdef',3); +insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4); + +copy copytest to stdout with (format 'json'); + +-- pg_copy_json do not support COPY FROM +copy copytest from stdout with (format 'json'); + +-- test copying in JSON format with various styles +-- of embedded escaped characters + +create temp table copyjsontest ( + id bigserial, + f1 text, + f2 timestamptz); + +insert into copyjsontest + select g.i, + CASE WHEN g.i % 2 = 0 THEN + 'line with '' in it: ' || g.i::text + ELSE + 'line with " in it: ' || g.i::text + END, + 'Mon Feb 10 17:32:01 1997 PST' + from generate_series(1,5) as g(i); + +insert into copyjsontest (f1) values +(E'aaa\"bbb'::text), +(E'aaa\\bbb'::text), +(E'aaa\/bbb'::text), +(E'aaa\bbbb'::text), +(E'aaa\fbbb'::text), +(E'aaa\nbbb'::text), +(E'aaa\rbbb'::text), +(E'aaa\tbbb'::text); + +copy copyjsontest to stdout with (format 'json'); + +-- test force array + +copy copytest to stdout (format 'json', force_array); +copy copytest to stdout (format 'json', force_array true); +copy copytest to stdout (format 'json', force_array false); diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index d719a61f16..fabd4e611e 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -83,8 +83,6 @@ typedef struct JsonAggState JsonUniqueBuilderState unique_check; } JsonAggState; -static void composite_to_json(Datum composite, StringInfo result, - bool use_line_feeds); static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals, bool *nulls, int *valcount, JsonTypeCategory tcategory, Oid outfuncoid, @@ -507,8 +505,9 @@ array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds) /* * Turn a composite / record into JSON. + * Exported so COPY TO can use it. */ -static void +void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds) { HeapTupleHeader td; diff --git a/src/include/utils/json.h b/src/include/utils/json.h index 6d7f1b387d..d5631171ad 100644 --- a/src/include/utils/json.h +++ b/src/include/utils/json.h @@ -17,6 +17,8 @@ #include "lib/stringinfo.h" /* functions in json.c */ +extern void composite_to_json(Datum composite, StringInfo result, + bool use_line_feeds); extern void escape_json(StringInfo buf, const char *str); extern char *JsonEncodeDateTime(char *buf, Datum value, Oid typid, const int *tzp); -- 2.41.0