From ad050583d3c14bdec44266d8d2110b384fa9d7dc Mon Sep 17 00:00:00 2001 From: Nazir Bilal Yavuz Date: Tue, 14 Oct 2025 13:18:13 +0300 Subject: [PATCH v3 2/2] COPY SIMD per-line heuristic --- src/include/commands/copyfrom_internal.h | 7 ++ src/backend/commands/copyfrom.c | 6 ++ src/backend/commands/copyfromparse.c | 82 ++++++++++++++++++++++-- 3 files changed, 89 insertions(+), 6 deletions(-) diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h index c8b22af22d8..9dd31320f52 100644 --- a/src/include/commands/copyfrom_internal.h +++ b/src/include/commands/copyfrom_internal.h @@ -89,6 +89,13 @@ typedef struct CopyFromStateData const char *cur_attval; /* current att value for error messages */ bool relname_only; /* don't output line number, att, etc. */ + /* SIMD variables */ + bool simd_continue; + bool simd_initialized; + uint16 simd_last_sleep_cycle; + uint16 simd_current_sleep_cycle; + + /* * Working state */ diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 12781963b4f..4bdfd96c244 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1721,6 +1721,12 @@ BeginCopyFrom(ParseState *pstate, cstate->cur_attval = NULL; cstate->relname_only = false; + /* Initialize SIMD variables */ + cstate->simd_continue = false; + cstate->simd_initialized = false; + cstate->simd_current_sleep_cycle = 0; + cstate->simd_last_sleep_cycle = 0; + /* * Allocate buffers for the input pipeline. * diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 99959a40fab..24cef54e5e4 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -143,12 +143,14 @@ static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0"; /* non-export function prototypes */ static bool CopyReadLine(CopyFromState cstate, bool is_csv); -static bool CopyReadLineText(CopyFromState cstate, bool is_csv); static int CopyReadAttributesText(CopyFromState cstate); static int CopyReadAttributesCSV(CopyFromState cstate); static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo, Oid typioparam, int32 typmod, bool *isnull); +static pg_attribute_always_inline bool CopyReadLineText(CopyFromState cstate, + bool is_csv, + bool simd_continue); static pg_attribute_always_inline bool CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values, @@ -1173,8 +1175,23 @@ CopyReadLine(CopyFromState cstate, bool is_csv) resetStringInfo(&cstate->line_buf); cstate->line_buf_valid = false; - /* Parse data and transfer into line_buf */ - result = CopyReadLineText(cstate, is_csv); + /* If that is the first time we do read, initalize the SIMD */ + if (unlikely(!cstate->simd_initialized)) + { + cstate->simd_initialized = true; + cstate->simd_continue = true; + cstate->simd_current_sleep_cycle = 0; + cstate->simd_last_sleep_cycle = 0; + } + + /* + * Parse data and transfer into line_buf. To get benefit from inlining, + * call CopyReadLineText() with the constant boolean variables. + */ + if (cstate->simd_continue) + result = CopyReadLineText(cstate, is_csv, true); + else + result = CopyReadLineText(cstate, is_csv, false); if (result) { @@ -1241,8 +1258,8 @@ CopyReadLine(CopyFromState cstate, bool is_csv) /* * CopyReadLineText - inner loop of CopyReadLine for text mode */ -static bool -CopyReadLineText(CopyFromState cstate, bool is_csv) +static pg_attribute_always_inline bool +CopyReadLineText(CopyFromState cstate, bool is_csv, bool simd_continue) { char *copy_input_buf; int input_buf_ptr; @@ -1258,11 +1275,16 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) char escapec = '\0'; #ifndef USE_NO_SIMD +#define SIMD_SLEEP_MAX 1024 +#define SIMD_ADVANCE_AT_LEAST 5 Vector8 nl = vector8_broadcast('\n'); Vector8 cr = vector8_broadcast('\r'); Vector8 bs = vector8_broadcast('\\'); Vector8 quote = vector8_broadcast(0); Vector8 escape = vector8_broadcast(0); + + uint64 simd_total_cycle = 0; + uint64 simd_total_advance = 0; #endif if (is_csv) @@ -1358,12 +1380,14 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) * sequentially. - The remaining buffer is smaller than one vector * width (sizeof(Vector8)); SIMD operates on fixed-size chunks. */ - if (!last_was_esc && copy_buf_len - input_buf_ptr >= sizeof(Vector8)) + if (simd_continue && !last_was_esc && copy_buf_len - input_buf_ptr >= sizeof(Vector8)) { Vector8 chunk; Vector8 match = vector8_broadcast(0); uint32 mask; + simd_total_cycle++; + /* Load a chunk of data into a vector register */ vector8_load(&chunk, (const uint8 *) ©_input_buf[input_buf_ptr]); @@ -1391,11 +1415,13 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) int advance = pg_rightmost_one_pos32(mask); input_buf_ptr += advance; + simd_total_advance += advance; } else { /* No special characters found, so skip the entire chunk */ input_buf_ptr += sizeof(Vector8); + simd_total_advance += sizeof(Vector8); continue; } } @@ -1603,6 +1629,50 @@ CopyReadLineText(CopyFromState cstate, bool is_csv) } } /* end of outer loop */ +#ifndef USE_NO_SIMD + + /* SIMD was enabled */ + if (simd_continue) + { + /* SIMD is worth */ + if (simd_total_cycle && simd_total_advance / simd_total_cycle >= SIMD_ADVANCE_AT_LEAST) + { + Assert(cstate->simd_current_sleep_cycle == 0); + cstate->simd_last_sleep_cycle >>= 1; + } + /* SIMD was enabled but it isn't worth */ + else + { + uint16 simd_last_sleep_cycle = cstate->simd_last_sleep_cycle; + + cstate->simd_continue = false; + + if (simd_last_sleep_cycle == 0) + simd_last_sleep_cycle = 1; + else if (simd_last_sleep_cycle >= SIMD_SLEEP_MAX / 2) + simd_last_sleep_cycle = SIMD_SLEEP_MAX; + else + simd_last_sleep_cycle <<= 1; + cstate->simd_current_sleep_cycle = simd_last_sleep_cycle; + cstate->simd_last_sleep_cycle = simd_last_sleep_cycle; + } + } + /* SIMD was disabled */ + else + { + /* + * We should come here with decrementing + * cstate->simd_current_sleep_cycle from a positive number. + */ + Assert(cstate->simd_current_sleep_cycle != 0); + cstate->simd_current_sleep_cycle--; + + if (cstate->simd_current_sleep_cycle == 0) + cstate->simd_continue = true; + } + +#endif + /* * Transfer any still-uncopied data to line_buf. */ -- 2.51.0