From a9411516dcdcf4c91a9f88dd05741a1479dec7a3 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 4 Feb 2021 21:36:46 +0200 Subject: [PATCH v2 1/1] Fix a corner-case in COPY FROM backslash processing. If a multi-byte character is escaped with a backslash in TEXT mode input, we didn't always treat the escape correctly. If: - a multi-byte character is escaped with a backslash, and - the second byte of the character is 0x5C, i.e. the ASCII code of a backslash (\), and - the next character is a dot (.), then CopyReadLineText function would incorrectly interpret the sequence as an end-of-copy marker (\.). This can only happen in encodings that can "embed" ascii characters as the second byte. One example of such sequence is '\x5ca45c2e666f6f' in Big5 encoding. If you put that in a file, and load it with COPY FROM, you'd incorrectly get an "end-of-copy marker corrupt" error. Backpatch to all supported versions. Reviewed-by: John Naylor, Kyotaro Horiguchi Discussion: https://www.postgresql.org/message-id/a897f84f-8dca-8798-3139-07da5bb38728%40iki.fi --- src/backend/commands/copyfromparse.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index b843d315b1..315b16fd7a 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -1084,7 +1084,7 @@ CopyReadLineText(CopyFromState cstate) break; } else if (!cstate->opts.csv_mode) - + { /* * If we are here, it means we found a backslash followed by * something other than a period. In non-CSV mode, anything @@ -1095,8 +1095,16 @@ CopyReadLineText(CopyFromState cstate) * backslashes are not special, so we want to process the * character after the backslash just like a normal character, * so we don't increment in those cases. + * + * Set 'c' to skip whole character correctly in multi-byte + * encodings. If we don't have the whole character in the + * buffer yet, we might loop back to process it, after all, + * but that's OK because multi-byte characters cannot have any + * special meaning. */ raw_buf_ptr++; + c = c2; + } } /* -- 2.30.0