Re: Make COPY format extendable: Extract COPY TO format implementations - Mailing list pgsql-hackers
| From | Sutou Kouhei |
|---|---|
| Subject | Re: Make COPY format extendable: Extract COPY TO format implementations |
| Date | |
| Msg-id | 20251014.111524.966856440891708928.kou@clear-code.com Whole thread Raw |
| In response to | Re: Make COPY format extendable: Extract COPY TO format implementations (Masahiko Sawada <sawada.mshk@gmail.com>) |
| Responses |
Re: Make COPY format extendable: Extract COPY TO format implementations
|
| List | pgsql-hackers |
Hi,
In <CAD21AoBkA=g=PN17r_iieru+vLyLtGZ8WvohgANa2vzsMfMogQ@mail.gmail.com>
"Re: Make COPY format extendable: Extract COPY TO format implementations" on Mon, 13 Oct 2025 14:40:31 -0700,
Masahiko Sawada <sawada.mshk@gmail.com> wrote:
> The patch refactors the CopyToStateData so that we can both hide
> internal-use-only fields from extensions and extension can use its own
> state data, while not adding extra indirection layers. TBH I'm really
> not sure we must fully hide internal fields from extensions. Other
> extendable components seem not to strictly hide internal information
> from extensions. I'd suggest starting with only the latter point. That
> is, we merge fields in CopyToStateInternalData to CopyToStateData.
> What do you think?
OK. Let's follow the existing style. How about the attached
patch? It merges CopyToStateInternalData to CopyToStateData.
Thanks,
--
kou
From 325f56d4b4372f7b90b88c9c9068d253fcc9f39a Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Tue, 14 Oct 2025 11:08:23 +0900
Subject: [PATCH] Split CopyToStateData to CopyToState{,Builtin}Data
---
src/backend/commands/copyto.c | 170 ++++++++++++++++-----------------
src/include/commands/copyapi.h | 66 +++++++++++++
2 files changed, 148 insertions(+), 88 deletions(-)
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index e5781155cdf..176d98f866b 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -21,7 +21,6 @@
#include "access/tableam.h"
#include "commands/copyapi.h"
#include "commands/progress.h"
-#include "executor/execdesc.h"
#include "executor/executor.h"
#include "executor/tuptable.h"
#include "libpq/libpq.h"
@@ -37,19 +36,7 @@
#include "utils/snapmgr.h"
/*
- * Represents the different dest cases we need to worry about at
- * the bottom level
- */
-typedef enum CopyDest
-{
- COPY_FILE, /* to file (or a piped program) */
- COPY_FRONTEND, /* to frontend */
- COPY_CALLBACK, /* to callback function */
-} CopyDest;
-
-/*
- * This struct contains all the state variables used throughout a COPY TO
- * operation.
+ * This struct contains the state variables used by built-in format routines.
*
* Multi-byte encodings: all supported client-side encodings encode multi-byte
* characters by having the first byte's high bit set. Subsequent bytes of the
@@ -62,40 +49,16 @@ typedef enum CopyDest
* invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
* when we have to do it the hard way.
*/
-typedef struct CopyToStateData
+typedef struct CopyToStateBuiltinData
{
- /* format-specific routines */
- const CopyToRoutine *routine;
+ CopyToStateData parent;
/* low-level state data */
- CopyDest copy_dest; /* type of copy source/destination */
- FILE *copy_file; /* used if copy_dest == COPY_FILE */
- StringInfo fe_msgbuf; /* used for all dests during COPY TO */
-
int file_encoding; /* file or remote side's character encoding */
bool need_transcoding; /* file encoding diff from server? */
bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
-
- /* parameters from the COPY command */
- Relation rel; /* relation to copy to */
- QueryDesc *queryDesc; /* executable query to copy from */
- List *attnumlist; /* integer list of attnums to copy */
- char *filename; /* filename, or NULL for STDOUT */
- bool is_program; /* is 'filename' a program to popen? */
- copy_data_dest_cb data_dest_cb; /* function for writing data */
-
- CopyFormatOptions opts;
- Node *whereClause; /* WHERE condition (or NULL) */
-
- /*
- * Working state
- */
- MemoryContext copycontext; /* per-copy execution context */
-
- FmgrInfo *out_functions; /* lookup info for output functions */
- MemoryContext rowcontext; /* per-row evaluation context */
- uint64 bytes_processed; /* number of bytes processed so far */
-} CopyToStateData;
+} CopyToStateBuiltinData;
+typedef struct CopyToStateBuiltinData *CopyToStateBuiltin;
/* DestReceiver for COPY (query) TO */
typedef struct
@@ -118,6 +81,7 @@ static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
bool use_quote);
/* built-in format-specific routines */
+static size_t CopyToBuiltinGetStateSize(void);
static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
static void CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
static void CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot);
@@ -150,6 +114,7 @@ static void CopySendInt16(CopyToState cstate, int16 val);
/* text format */
static const CopyToRoutine CopyToRoutineText = {
+ .CopyToGetStateSize = CopyToBuiltinGetStateSize,
.CopyToStart = CopyToTextLikeStart,
.CopyToOutFunc = CopyToTextLikeOutFunc,
.CopyToOneRow = CopyToTextOneRow,
@@ -158,6 +123,7 @@ static const CopyToRoutine CopyToRoutineText = {
/* CSV format */
static const CopyToRoutine CopyToRoutineCSV = {
+ .CopyToGetStateSize = CopyToBuiltinGetStateSize,
.CopyToStart = CopyToTextLikeStart,
.CopyToOutFunc = CopyToTextLikeOutFunc,
.CopyToOneRow = CopyToCSVOneRow,
@@ -166,6 +132,7 @@ static const CopyToRoutine CopyToRoutineCSV = {
/* binary format */
static const CopyToRoutine CopyToRoutineBinary = {
+ .CopyToGetStateSize = CopyToBuiltinGetStateSize,
.CopyToStart = CopyToBinaryStart,
.CopyToOutFunc = CopyToBinaryOutFunc,
.CopyToOneRow = CopyToBinaryOneRow,
@@ -185,18 +152,27 @@ CopyToGetRoutine(const CopyFormatOptions *opts)
return &CopyToRoutineText;
}
+/* Implementation of the allocate callback for all built-in formats */
+static size_t
+CopyToBuiltinGetStateSize(void)
+{
+ return sizeof(CopyToStateBuiltinData);
+}
+
/* Implementation of the start callback for text and CSV formats */
static void
CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
{
+ CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate;
+
/*
* For non-binary copy, we need to convert null_print to file encoding,
* because it will be sent directly with CopySendString.
*/
- if (cstate->need_transcoding)
+ if (cstate_builtin->need_transcoding)
cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print,
cstate->opts.null_print_len,
- cstate->file_encoding);
+ cstate_builtin->file_encoding);
/* if a header has been requested send the line */
if (cstate->opts.header_line == COPY_HEADER_TRUE)
@@ -401,7 +377,7 @@ SendCopyBegin(CopyToState cstate)
for (i = 0; i < natts; i++)
pq_sendint16(&buf, format); /* per-column formats */
pq_endmessage(&buf);
- cstate->copy_dest = COPY_FRONTEND;
+ cstate->copy_dest = COPY_DEST_FRONTEND;
}
static void
@@ -448,7 +424,7 @@ CopySendEndOfRow(CopyToState cstate)
switch (cstate->copy_dest)
{
- case COPY_FILE:
+ case COPY_DEST_FILE:
if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
cstate->copy_file) != 1 ||
ferror(cstate->copy_file))
@@ -482,11 +458,11 @@ CopySendEndOfRow(CopyToState cstate)
errmsg("could not write to COPY file: %m")));
}
break;
- case COPY_FRONTEND:
+ case COPY_DEST_FRONTEND:
/* Dump the accumulated row as one CopyData message */
(void) pq_putmessage(PqMsg_CopyData, fe_msgbuf->data, fe_msgbuf->len);
break;
- case COPY_CALLBACK:
+ case COPY_DEST_CALLBACK:
cstate->data_dest_cb(fe_msgbuf->data, fe_msgbuf->len);
break;
}
@@ -507,7 +483,7 @@ CopySendTextLikeEndOfRow(CopyToState cstate)
{
switch (cstate->copy_dest)
{
- case COPY_FILE:
+ case COPY_DEST_FILE:
/* Default line termination depends on platform */
#ifndef WIN32
CopySendChar(cstate, '\n');
@@ -515,7 +491,7 @@ CopySendTextLikeEndOfRow(CopyToState cstate)
CopySendString(cstate, "\r\n");
#endif
break;
- case COPY_FRONTEND:
+ case COPY_DEST_FRONTEND:
/* The FE/BE protocol uses \n as newline for all platforms */
CopySendChar(cstate, '\n');
break;
@@ -631,10 +607,14 @@ BeginCopyTo(ParseState *pstate,
List *options)
{
CopyToState cstate;
+ CopyToStateBuiltin cstate_builtin;
bool pipe = (filename == NULL && data_dest_cb == NULL);
TupleDesc tupDesc;
int num_phys_attrs;
+ MemoryContext copycontext;
MemoryContext oldcontext;
+ CopyFormatOptions opts = {0};
+ const CopyToRoutine *routine;
const int progress_cols[] = {
PROGRESS_COPY_COMMAND,
PROGRESS_COPY_TYPE
@@ -686,24 +666,33 @@ BeginCopyTo(ParseState *pstate,
}
- /* Allocate workspace and zero all fields */
- cstate = (CopyToStateData *) palloc0(sizeof(CopyToStateData));
-
/*
* We allocate everything used by a cstate in a new memory context. This
* avoids memory leaks during repeated use of COPY in a query.
*/
- cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
- "COPY",
- ALLOCSET_DEFAULT_SIZES);
+ copycontext = AllocSetContextCreate(CurrentMemoryContext,
+ "COPY",
+ ALLOCSET_DEFAULT_SIZES);
- oldcontext = MemoryContextSwitchTo(cstate->copycontext);
+ oldcontext = MemoryContextSwitchTo(copycontext);
/* Extract options from the statement node tree */
- ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
+ ProcessCopyOptions(pstate, &opts, false /* is_from */ , options);
- /* Set format routine */
- cstate->routine = CopyToGetRoutine(&cstate->opts);
+ /* Get format routine */
+ routine = CopyToGetRoutine(&opts);
+
+ /* Allocate workspace and set known values */
+ MemoryContextSwitchTo(oldcontext);
+ cstate = (CopyToState) palloc0(routine->CopyToGetStateSize());
+ MemoryContextSwitchTo(copycontext);
+ if (routine == &CopyToRoutineText || routine == &CopyToRoutineCSV || routine == &CopyToRoutineBinary)
+ cstate_builtin = (CopyToStateBuiltin) cstate;
+ else
+ cstate_builtin = NULL;
+ cstate->copycontext = copycontext;
+ cstate->opts = opts;
+ cstate->routine = routine;
/* Process the source/target relation or query */
if (rel)
@@ -883,31 +872,34 @@ BeginCopyTo(ParseState *pstate,
}
}
- /* Use client encoding when ENCODING option is not specified. */
- if (cstate->opts.file_encoding < 0)
- cstate->file_encoding = pg_get_client_encoding();
- else
- cstate->file_encoding = cstate->opts.file_encoding;
+ if (cstate_builtin)
+ {
+ /* Use client encoding when ENCODING option is not specified. */
+ if (cstate->opts.file_encoding < 0)
+ cstate_builtin->file_encoding = pg_get_client_encoding();
+ else
+ cstate_builtin->file_encoding = cstate->opts.file_encoding;
- /*
- * Set up encoding conversion info if the file and server encodings differ
- * (see also pg_server_to_any).
- */
- if (cstate->file_encoding == GetDatabaseEncoding() ||
- cstate->file_encoding == PG_SQL_ASCII)
- cstate->need_transcoding = false;
- else
- cstate->need_transcoding = true;
+ /*
+ * Set up encoding conversion info if the file and server encodings
+ * differ (see also pg_server_to_any).
+ */
+ if (cstate_builtin->file_encoding == GetDatabaseEncoding() ||
+ cstate_builtin->file_encoding == PG_SQL_ASCII)
+ cstate_builtin->need_transcoding = false;
+ else
+ cstate_builtin->need_transcoding = true;
- /* See Multibyte encoding comment above */
- cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
+ /* See Multibyte encoding comment above */
+ cstate_builtin->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate_builtin->file_encoding);
+ }
- cstate->copy_dest = COPY_FILE; /* default */
+ cstate->copy_dest = COPY_DEST_FILE; /* default */
if (data_dest_cb)
{
progress_vals[1] = PROGRESS_COPY_TYPE_CALLBACK;
- cstate->copy_dest = COPY_CALLBACK;
+ cstate->copy_dest = COPY_DEST_CALLBACK;
cstate->data_dest_cb = data_dest_cb;
}
else if (pipe)
@@ -1146,13 +1138,14 @@ CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
static void
CopyAttributeOutText(CopyToState cstate, const char *string)
{
+ CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate;
const char *ptr;
const char *start;
char c;
char delimc = cstate->opts.delim[0];
- if (cstate->need_transcoding)
- ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+ if (cstate_builtin->need_transcoding)
+ ptr = pg_server_to_any(string, strlen(string), cstate_builtin->file_encoding);
else
ptr = string;
@@ -1170,7 +1163,7 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
* it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
* of the normal safe-encoding path.
*/
- if (cstate->encoding_embeds_ascii)
+ if (cstate_builtin->encoding_embeds_ascii)
{
start = ptr;
while ((c = *ptr) != '\0')
@@ -1225,7 +1218,7 @@ CopyAttributeOutText(CopyToState cstate, const char *string)
start = ptr++; /* we include char in next run */
}
else if (IS_HIGHBIT_SET(c))
- ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
+ ptr += pg_encoding_mblen(cstate_builtin->file_encoding, ptr);
else
ptr++;
}
@@ -1300,6 +1293,7 @@ static void
CopyAttributeOutCSV(CopyToState cstate, const char *string,
bool use_quote)
{
+ CopyToStateBuiltin cstate_builtin = (CopyToStateBuiltin) cstate;
const char *ptr;
const char *start;
char c;
@@ -1312,8 +1306,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
if (!use_quote && strcmp(string, cstate->opts.null_print) == 0)
use_quote = true;
- if (cstate->need_transcoding)
- ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
+ if (cstate_builtin->need_transcoding)
+ ptr = pg_server_to_any(string, strlen(string), cstate_builtin->file_encoding);
else
ptr = string;
@@ -1342,8 +1336,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
use_quote = true;
break;
}
- if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
- tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
+ if (IS_HIGHBIT_SET(c) && cstate_builtin->encoding_embeds_ascii)
+ tptr += pg_encoding_mblen(cstate_builtin->file_encoding, tptr);
else
tptr++;
}
@@ -1366,8 +1360,8 @@ CopyAttributeOutCSV(CopyToState cstate, const char *string,
CopySendChar(cstate, escapec);
start = ptr; /* we include char in next run */
}
- if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
- ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
+ if (IS_HIGHBIT_SET(c) && cstate_builtin->encoding_embeds_ascii)
+ ptr += pg_encoding_mblen(cstate_builtin->file_encoding, ptr);
else
ptr++;
}
diff --git a/src/include/commands/copyapi.h b/src/include/commands/copyapi.h
index 2a2d2f9876b..aece73f4ca2 100644
--- a/src/include/commands/copyapi.h
+++ b/src/include/commands/copyapi.h
@@ -15,6 +15,7 @@
#define COPYAPI_H
#include "commands/copy.h"
+#include "executor/execdesc.h"
/*
* API structure for a COPY TO format implementation. Note this must be
@@ -22,6 +23,25 @@
*/
typedef struct CopyToRoutine
{
+ /* ---
+ * Return state size for this routine.
+ *
+ * If this routine uses CopyToStateData as-is, `return
+ * sizeof(CopyToStateData)` can be used.
+ *
+ * If this routine needs additional data than CopyToStateData, a new
+ * struct based on CopyToStateData can be used something like:
+ *
+ * typedef struct MyCopyToStateDate {
+ * struct CopyToStateData parent;
+ * int define_additional_members_here;
+ * } MyCopyToStateData;
+ *
+ * In the case, this callback returns `sizeof(MyCopyToStateData)`.
+ * ---
+ */
+ size_t (*CopyToGetStateSize) (void);
+
/*
* Set output function information. This callback is called once at the
* beginning of COPY TO.
@@ -54,6 +74,52 @@ typedef struct CopyToRoutine
void (*CopyToEnd) (CopyToState cstate);
} CopyToRoutine;
+/*
+ * Represents the different dest cases we need to worry about at
+ * the bottom level
+ */
+typedef enum CopyDest
+{
+ COPY_DEST_FILE, /* to file (or a piped program) */
+ COPY_DEST_FRONTEND, /* to frontend */
+ COPY_DEST_CALLBACK, /* to callback function */
+} CopyDest;
+
+/*
+ * This struct contains the state variables used by PostgreSQL, built-in
+ * format routines and custom format routines.
+ */
+typedef struct CopyToStateData
+{
+ /* format-specific routines */
+ const CopyToRoutine *routine;
+
+ /* low-level state data */
+ CopyDest copy_dest; /* type of copy source/destination */
+ FILE *copy_file; /* used if copy_dest == COPY_FILE */
+ StringInfo fe_msgbuf; /* used for all dests during COPY TO */
+
+ /* parameters from the COPY command */
+ Relation rel; /* relation to copy to */
+ QueryDesc *queryDesc; /* executable query to copy from */
+ List *attnumlist; /* integer list of attnums to copy */
+ char *filename; /* filename, or NULL for STDOUT */
+ bool is_program; /* is 'filename' a program to popen? */
+ copy_data_dest_cb data_dest_cb; /* function for writing data */
+
+ CopyFormatOptions opts;
+ Node *whereClause; /* WHERE condition (or NULL) */
+
+ /*
+ * Working state
+ */
+ MemoryContext copycontext; /* per-copy execution context */
+
+ FmgrInfo *out_functions; /* lookup info for output functions */
+ MemoryContext rowcontext; /* per-row evaluation context */
+ uint64 bytes_processed; /* number of bytes processed so far */
+} CopyToStateData;
+
/*
* API structure for a COPY FROM format implementation. Note this must be
* allocated in a server-lifetime manner, typically as a static const struct.
--
2.51.0
pgsql-hackers by date: