From d99b59652f0b8479a5df0bc50357b5c4617f9fc2 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Tue, 19 Jul 2022 10:48:58 -0700 Subject: [PATCH v2 1/2] pg_clean_ascii(): escape bytes rather than lose them Rather than replace each unprintable byte with a '?' character, replace it with a hex escape instead. The API is now two-pass (one pass to get the escaped length of the string, the second pass to perform the escaping), in order to allow the use of guc_malloc'd buffers. --- src/backend/postmaster/postmaster.c | 4 +-- src/backend/utils/misc/guc.c | 10 ++++++-- src/common/string.c | 38 ++++++++++++++++++++++------- src/include/common/string.h | 2 +- 4 files changed, 40 insertions(+), 14 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 1c25457526..8f5cdf4380 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2284,9 +2284,9 @@ retry1: */ if (strcmp(nameptr, "application_name") == 0) { - char *tmp_app_name = pstrdup(valptr); + char *tmp_app_name = palloc(pg_clean_ascii(valptr, NULL)); - pg_clean_ascii(tmp_app_name); + pg_clean_ascii(valptr, tmp_app_name); port->application_name = tmp_app_name; } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 0328029d43..2e1a7af315 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -12480,7 +12480,10 @@ static bool check_application_name(char **newval, void **extra, GucSource source) { /* Only allow clean ASCII chars in the application name */ - pg_clean_ascii(*newval); + char *buf = guc_malloc(ERROR, pg_clean_ascii(*newval, NULL)); + + pg_clean_ascii(*newval, buf); + *newval = buf; return true; } @@ -12496,7 +12499,10 @@ static bool check_cluster_name(char **newval, void **extra, GucSource source) { /* Only allow clean ASCII chars in the cluster name */ - pg_clean_ascii(*newval); + char *buf = guc_malloc(ERROR, pg_clean_ascii(*newval, NULL)); + + pg_clean_ascii(*newval, buf); + *newval = buf; return true; } diff --git a/src/common/string.c b/src/common/string.c index 16940d1fa7..82a8afa4a9 100644 --- a/src/common/string.c +++ b/src/common/string.c @@ -22,6 +22,7 @@ #endif #include "common/string.h" +#include "lib/stringinfo.h" /* @@ -59,9 +60,11 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base) /* - * pg_clean_ascii -- Replace any non-ASCII chars with a '?' char + * pg_clean_ascii -- Replace any non-ASCII chars with a "\xXX" string * - * Modifies the string passed in which must be '\0'-terminated. + * Puts an escaped copy into the dst buffer, which must be at least as big as + * the return value of pg_clean_ascii(src, NULL). The input string must be + * '\0'-terminated. * * This function exists specifically to deal with filtering out * non-ASCII characters in a few places where the client can provide an almost @@ -73,22 +76,39 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base) * In general, this function should NOT be used- instead, consider how to handle * the string without needing to filter out the non-ASCII characters. * - * Ultimately, we'd like to improve the situation to not require stripping out - * all non-ASCII but perform more intelligent filtering which would allow UTF or + * Ultimately, we'd like to improve the situation to not require replacing all + * non-ASCII but perform more intelligent filtering which would allow UTF or * similar, but it's unclear exactly what we should allow, so stick to ASCII only * for now. */ -void -pg_clean_ascii(char *str) +size_t +pg_clean_ascii(const char *str, char *dst) { - /* Only allow clean ASCII chars in the string */ - char *p; + const char *p; + size_t i = 0; for (p = str; *p != '\0'; p++) { + /* Only allow clean ASCII chars in the string */ if (*p < 32 || *p > 126) - *p = '?'; + { + if (dst) + snprintf(&dst[i], 5, "\\x%02x", (unsigned char) *p); + i += 4; + } + else + { + if (dst) + dst[i] = *p; + i++; + } } + + if (dst) + dst[i] = '\0'; + i++; + + return i; } diff --git a/src/include/common/string.h b/src/include/common/string.h index cf00fb53cd..0466da640c 100644 --- a/src/include/common/string.h +++ b/src/include/common/string.h @@ -24,7 +24,7 @@ typedef struct PromptInterruptContext extern bool pg_str_endswith(const char *str, const char *end); extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base); -extern void pg_clean_ascii(char *str); +extern size_t pg_clean_ascii(const char *str, char *dst); extern int pg_strip_crlf(char *str); extern bool pg_is_ascii(const char *str); -- 2.25.1