From dfd76f4dbcf3834371442d593d315797762bbd11 Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Tue, 19 Jul 2022 10:48:58 -0700 Subject: [PATCH v3 1/2] pg_clean_ascii(): escape bytes rather than lose them Rather than replace each unprintable byte with a '?' character, replace it with a hex escape instead. The API now allocates a copy rather than modifying the input in place. --- src/backend/postmaster/postmaster.c | 6 +----- src/backend/utils/misc/guc.c | 4 ++-- src/common/string.c | 26 +++++++++++++++++--------- src/include/common/string.h | 2 +- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 1c25457526..5e8cd770c0 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -2284,11 +2284,7 @@ retry1: */ if (strcmp(nameptr, "application_name") == 0) { - char *tmp_app_name = pstrdup(valptr); - - pg_clean_ascii(tmp_app_name); - - port->application_name = tmp_app_name; + port->application_name = pg_clean_ascii(valptr); } } offset = valoffset + strlen(valptr) + 1; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 0328029d43..60400752e5 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -12480,7 +12480,7 @@ static bool check_application_name(char **newval, void **extra, GucSource source) { /* Only allow clean ASCII chars in the application name */ - pg_clean_ascii(*newval); + *newval = guc_strdup(ERROR, pg_clean_ascii(*newval)); return true; } @@ -12496,7 +12496,7 @@ static bool check_cluster_name(char **newval, void **extra, GucSource source) { /* Only allow clean ASCII chars in the cluster name */ - pg_clean_ascii(*newval); + *newval = guc_strdup(ERROR, pg_clean_ascii(*newval)); return true; } diff --git a/src/common/string.c b/src/common/string.c index 16940d1fa7..db15324c62 100644 --- a/src/common/string.c +++ b/src/common/string.c @@ -22,6 +22,7 @@ #endif #include "common/string.h" +#include "lib/stringinfo.h" /* @@ -59,9 +60,9 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base) /* - * pg_clean_ascii -- Replace any non-ASCII chars with a '?' char + * pg_clean_ascii -- Replace any non-ASCII chars with a "\xXX" string * - * Modifies the string passed in which must be '\0'-terminated. + * Makes a palloc'd copy of the string passed in, which must be '\0'-terminated. * * This function exists specifically to deal with filtering out * non-ASCII characters in a few places where the client can provide an almost @@ -73,22 +74,29 @@ strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base) * In general, this function should NOT be used- instead, consider how to handle * the string without needing to filter out the non-ASCII characters. * - * Ultimately, we'd like to improve the situation to not require stripping out - * all non-ASCII but perform more intelligent filtering which would allow UTF or + * Ultimately, we'd like to improve the situation to not require replacing all + * non-ASCII but perform more intelligent filtering which would allow UTF or * similar, but it's unclear exactly what we should allow, so stick to ASCII only * for now. */ -void -pg_clean_ascii(char *str) +char * +pg_clean_ascii(const char *str) { - /* Only allow clean ASCII chars in the string */ - char *p; + StringInfoData buf; + const char *p; + + initStringInfo(&buf); for (p = str; *p != '\0'; p++) { + /* Only allow clean ASCII chars in the string */ if (*p < 32 || *p > 126) - *p = '?'; + appendStringInfo(&buf, "\\x%02x", (unsigned char) *p); + else + appendStringInfoChar(&buf, *p); } + + return buf.data; } diff --git a/src/include/common/string.h b/src/include/common/string.h index cf00fb53cd..d10d0c9cbf 100644 --- a/src/include/common/string.h +++ b/src/include/common/string.h @@ -24,7 +24,7 @@ typedef struct PromptInterruptContext extern bool pg_str_endswith(const char *str, const char *end); extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr, int base); -extern void pg_clean_ascii(char *str); +extern char *pg_clean_ascii(const char *str); extern int pg_strip_crlf(char *str); extern bool pg_is_ascii(const char *str); -- 2.25.1