From ce06c03e2b5c50675d0bd8961c858bd0165cfabd Mon Sep 17 00:00:00 2001 From: Jacob Champion Date: Mon, 3 May 2021 15:38:26 -0700 Subject: [PATCH v19 1/9] common/jsonapi: support FRONTEND clients Based on a patch by Michael Paquier. For frontend code, use PQExpBuffer instead of StringInfo. This requires us to track allocation failures so that we can return JSON_OUT_OF_MEMORY as needed. json_errdetail() now allocates its error message inside memory owned by the JsonLexContext, so clients don't need to worry about freeing it. We can now partially revert b44669b2ca, now that json_errdetail() works correctly. Co-authored-by: Daniel Gustafsson --- src/bin/pg_verifybackup/t/005_bad_manifest.pl | 3 +- src/common/Makefile | 2 +- src/common/jsonapi.c | 268 +++++++++++++----- src/common/meson.build | 8 +- src/common/parse_manifest.c | 2 +- src/common/stringinfo.c | 7 + src/include/common/jsonapi.h | 18 +- src/include/lib/stringinfo.h | 2 + 8 files changed, 225 insertions(+), 85 deletions(-) diff --git a/src/bin/pg_verifybackup/t/005_bad_manifest.pl b/src/bin/pg_verifybackup/t/005_bad_manifest.pl index e278ccea5a..e2a297930e 100644 --- a/src/bin/pg_verifybackup/t/005_bad_manifest.pl +++ b/src/bin/pg_verifybackup/t/005_bad_manifest.pl @@ -13,7 +13,8 @@ use Test::More; my $tempdir = PostgreSQL::Test::Utils::tempdir; test_bad_manifest('input string ended unexpectedly', - qr/could not parse backup manifest: parsing failed/, <input_encoding = encoding; if (need_escapes) { - lex->strval = makeStringInfo(); + /* + * This call can fail in FRONTEND code. We defer error handling to + * time of use (json_lex_string()) since we might not need to parse + * any strings anyway. + */ + lex->strval = createStrVal(); lex->flags |= JSONLEX_FREE_STRVAL; + lex->parse_strval = true; } + lex->errormsg = NULL; return lex; } @@ -182,13 +222,18 @@ makeJsonLexContextCstringLen(JsonLexContext *lex, char *json, void freeJsonLexContext(JsonLexContext *lex) { + static const JsonLexContext empty = {0}; + if (lex->flags & JSONLEX_FREE_STRVAL) - { - pfree(lex->strval->data); - pfree(lex->strval); - } + destroyStrVal(lex->strval); + + if (lex->errormsg) + destroyStrVal(lex->errormsg); + if (lex->flags & JSONLEX_FREE_STRUCT) pfree(lex); + else + *lex = empty; } /* @@ -254,7 +299,7 @@ json_count_array_elements(JsonLexContext *lex, int *elements) * etc, so doing this with a copy makes that safe. */ memcpy(©lex, lex, sizeof(JsonLexContext)); - copylex.strval = NULL; /* not interested in values here */ + copylex.parse_strval = false; /* not interested in values here */ copylex.lex_level++; count = 0; @@ -316,14 +361,21 @@ parse_scalar(JsonLexContext *lex, JsonSemAction *sem) /* extract the de-escaped string value, or the raw lexeme */ if (lex_peek(lex) == JSON_TOKEN_STRING) { - if (lex->strval != NULL) - val = pstrdup(lex->strval->data); + if (lex->parse_strval) + { + val = STRDUP(lex->strval->data); + if (val == NULL) + return JSON_OUT_OF_MEMORY; + } } else { int len = (lex->token_terminator - lex->token_start); - val = palloc(len + 1); + val = ALLOC(len + 1); + if (val == NULL) + return JSON_OUT_OF_MEMORY; + memcpy(val, lex->token_start, len); val[len] = '\0'; } @@ -357,8 +409,12 @@ parse_object_field(JsonLexContext *lex, JsonSemAction *sem) if (lex_peek(lex) != JSON_TOKEN_STRING) return report_parse_error(JSON_PARSE_STRING, lex); - if ((ostart != NULL || oend != NULL) && lex->strval != NULL) - fname = pstrdup(lex->strval->data); + if ((ostart != NULL || oend != NULL) && lex->parse_strval) + { + fname = STRDUP(lex->strval->data); + if (fname == NULL) + return JSON_OUT_OF_MEMORY; + } result = json_lex(lex); if (result != JSON_SUCCESS) return result; @@ -414,6 +470,11 @@ parse_object(JsonLexContext *lex, JsonSemAction *sem) JsonParseErrorType result; #ifndef FRONTEND + + /* + * TODO: clients need some way to put a bound on stack growth. Parse level + * limits maybe? + */ check_stack_depth(); #endif @@ -762,8 +823,15 @@ json_lex_string(JsonLexContext *lex) return code; \ } while (0) - if (lex->strval != NULL) - resetStringInfo(lex->strval); + if (lex->parse_strval) + { +#ifdef FRONTEND + /* make sure initialization succeeded */ + if (lex->strval == NULL) + return JSON_OUT_OF_MEMORY; +#endif + resetStrVal(lex->strval); + } Assert(lex->input_length > 0); s = lex->token_start; @@ -800,7 +868,7 @@ json_lex_string(JsonLexContext *lex) else FAIL_AT_CHAR_END(JSON_UNICODE_ESCAPE_FORMAT); } - if (lex->strval != NULL) + if (lex->parse_strval) { /* * Combine surrogate pairs. @@ -857,19 +925,19 @@ json_lex_string(JsonLexContext *lex) unicode_to_utf8(ch, (unsigned char *) utf8str); utf8len = pg_utf_mblen((unsigned char *) utf8str); - appendBinaryStringInfo(lex->strval, utf8str, utf8len); + appendBinaryPQExpBuffer(lex->strval, utf8str, utf8len); } else if (ch <= 0x007f) { /* The ASCII range is the same in all encodings */ - appendStringInfoChar(lex->strval, (char) ch); + appendPQExpBufferChar(lex->strval, (char) ch); } else FAIL_AT_CHAR_END(JSON_UNICODE_HIGH_ESCAPE); #endif /* FRONTEND */ } } - else if (lex->strval != NULL) + else if (lex->parse_strval) { if (hi_surrogate != -1) FAIL_AT_CHAR_END(JSON_UNICODE_LOW_SURROGATE); @@ -879,22 +947,22 @@ json_lex_string(JsonLexContext *lex) case '"': case '\\': case '/': - appendStringInfoChar(lex->strval, *s); + appendStrValChar(lex->strval, *s); break; case 'b': - appendStringInfoChar(lex->strval, '\b'); + appendStrValChar(lex->strval, '\b'); break; case 'f': - appendStringInfoChar(lex->strval, '\f'); + appendStrValChar(lex->strval, '\f'); break; case 'n': - appendStringInfoChar(lex->strval, '\n'); + appendStrValChar(lex->strval, '\n'); break; case 'r': - appendStringInfoChar(lex->strval, '\r'); + appendStrValChar(lex->strval, '\r'); break; case 't': - appendStringInfoChar(lex->strval, '\t'); + appendStrValChar(lex->strval, '\t'); break; default: @@ -929,7 +997,7 @@ json_lex_string(JsonLexContext *lex) /* * Skip to the first byte that requires special handling, so we - * can batch calls to appendBinaryStringInfo. + * can batch calls to appendBinaryStrVal. */ while (p < end - sizeof(Vector8) && !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) && @@ -953,8 +1021,8 @@ json_lex_string(JsonLexContext *lex) } } - if (lex->strval != NULL) - appendBinaryStringInfo(lex->strval, s, p - s); + if (lex->parse_strval) + appendBinaryStrVal(lex->strval, s, p - s); /* * s will be incremented at the top of the loop, so set it to just @@ -970,6 +1038,11 @@ json_lex_string(JsonLexContext *lex) return JSON_UNICODE_LOW_SURROGATE; } +#ifdef FRONTEND + if (lex->parse_strval && PQExpBufferBroken(lex->strval)) + return JSON_OUT_OF_MEMORY; +#endif + /* Hooray, we found the end of the string! */ lex->prev_token_terminator = lex->token_terminator; lex->token_terminator = s + 1; @@ -1145,72 +1218,93 @@ report_parse_error(JsonParseContext ctx, JsonLexContext *lex) return JSON_SUCCESS; /* silence stupider compilers */ } - -#ifndef FRONTEND -/* - * Extract the current token from a lexing context, for error reporting. - */ -static char * -extract_token(JsonLexContext *lex) -{ - int toklen = lex->token_terminator - lex->token_start; - char *token = palloc(toklen + 1); - - memcpy(token, lex->token_start, toklen); - token[toklen] = '\0'; - return token; -} - /* * Construct an (already translated) detail message for a JSON error. * - * Note that the error message generated by this routine may not be - * palloc'd, making it unsafe for frontend code as there is no way to - * know if this can be safely pfree'd or not. + * The returned allocation is either static or owned by the JsonLexContext and + * should not be freed. */ char * json_errdetail(JsonParseErrorType error, JsonLexContext *lex) { + int toklen = lex->token_terminator - lex->token_start; + + if (error == JSON_OUT_OF_MEMORY) + { + /* Short circuit. Allocating anything for this case is unhelpful. */ + return _("out of memory"); + } + + if (lex->errormsg) + resetStrVal(lex->errormsg); + else + lex->errormsg = createStrVal(); + switch (error) { case JSON_SUCCESS: /* fall through to the error code after switch */ break; case JSON_ESCAPING_INVALID: - return psprintf(_("Escape sequence \"\\%s\" is invalid."), - extract_token(lex)); + appendStrVal(lex->errormsg, + _("Escape sequence \"\\%.*s\" is invalid."), + toklen, lex->token_start); + break; case JSON_ESCAPING_REQUIRED: - return psprintf(_("Character with value 0x%02x must be escaped."), - (unsigned char) *(lex->token_terminator)); + appendStrVal(lex->errormsg, + _("Character with value 0x%02x must be escaped."), + (unsigned char) *(lex->token_terminator)); + break; case JSON_EXPECTED_END: - return psprintf(_("Expected end of input, but found \"%s\"."), - extract_token(lex)); + appendStrVal(lex->errormsg, + _("Expected end of input, but found \"%.*s\"."), + toklen, lex->token_start); + break; case JSON_EXPECTED_ARRAY_FIRST: - return psprintf(_("Expected array element or \"]\", but found \"%s\"."), - extract_token(lex)); + appendStrVal(lex->errormsg, + _("Expected array element or \"]\", but found \"%.*s\"."), + toklen, lex->token_start); + break; case JSON_EXPECTED_ARRAY_NEXT: - return psprintf(_("Expected \",\" or \"]\", but found \"%s\"."), - extract_token(lex)); + appendStrVal(lex->errormsg, + _("Expected \",\" or \"]\", but found \"%.*s\"."), + toklen, lex->token_start); + break; case JSON_EXPECTED_COLON: - return psprintf(_("Expected \":\", but found \"%s\"."), - extract_token(lex)); + appendStrVal(lex->errormsg, + _("Expected \":\", but found \"%.*s\"."), + toklen, lex->token_start); + break; case JSON_EXPECTED_JSON: - return psprintf(_("Expected JSON value, but found \"%s\"."), - extract_token(lex)); + appendStrVal(lex->errormsg, + _("Expected JSON value, but found \"%.*s\"."), + toklen, lex->token_start); + break; case JSON_EXPECTED_MORE: return _("The input string ended unexpectedly."); case JSON_EXPECTED_OBJECT_FIRST: - return psprintf(_("Expected string or \"}\", but found \"%s\"."), - extract_token(lex)); + appendStrVal(lex->errormsg, + _("Expected string or \"}\", but found \"%.*s\"."), + toklen, lex->token_start); + break; case JSON_EXPECTED_OBJECT_NEXT: - return psprintf(_("Expected \",\" or \"}\", but found \"%s\"."), - extract_token(lex)); + appendStrVal(lex->errormsg, + _("Expected \",\" or \"}\", but found \"%.*s\"."), + toklen, lex->token_start); + break; case JSON_EXPECTED_STRING: - return psprintf(_("Expected string, but found \"%s\"."), - extract_token(lex)); + appendStrVal(lex->errormsg, + _("Expected string, but found \"%.*s\"."), + toklen, lex->token_start); + break; case JSON_INVALID_TOKEN: - return psprintf(_("Token \"%s\" is invalid."), - extract_token(lex)); + appendStrVal(lex->errormsg, + _("Token \"%.*s\" is invalid."), + toklen, lex->token_start); + break; + case JSON_OUT_OF_MEMORY: + /* should have been handled above; use the error path */ + break; case JSON_UNICODE_CODE_POINT_ZERO: return _("\\u0000 cannot be converted to text."); case JSON_UNICODE_ESCAPE_FORMAT: @@ -1219,9 +1313,19 @@ json_errdetail(JsonParseErrorType error, JsonLexContext *lex) /* note: this case is only reachable in frontend not backend */ return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8."); case JSON_UNICODE_UNTRANSLATABLE: - /* note: this case is only reachable in backend not frontend */ + + /* + * note: this case is only reachable in backend not frontend. + * #ifdef it away so the frontend doesn't try to link against + * backend functionality. + */ +#ifndef FRONTEND return psprintf(_("Unicode escape value could not be translated to the server's encoding %s."), GetDatabaseEncodingName()); +#else + Assert(false); + break; +#endif case JSON_UNICODE_HIGH_SURROGATE: return _("Unicode high surrogate must not follow a high surrogate."); case JSON_UNICODE_LOW_SURROGATE: @@ -1231,12 +1335,22 @@ json_errdetail(JsonParseErrorType error, JsonLexContext *lex) break; } - /* - * We don't use a default: case, so that the compiler will warn about - * unhandled enum values. But this needs to be here anyway to cover the - * possibility of an incorrect input. - */ - elog(ERROR, "unexpected json parse error type: %d", (int) error); - return NULL; -} + /* Note that lex->errormsg can be NULL in FRONTEND code. */ + if (lex->errormsg && !lex->errormsg->data[0]) + { + /* + * We don't use a default: case, so that the compiler will warn about + * unhandled enum values. But this needs to be here anyway to cover + * the possibility of an incorrect input. + */ + appendStrVal(lex->errormsg, + "unexpected json parse error type: %d", (int) error); + } + +#ifdef FRONTEND + if (PQExpBufferBroken(lex->errormsg)) + return _("out of memory while constructing error description"); #endif + + return lex->errormsg->data; +} diff --git a/src/common/meson.build b/src/common/meson.build index 4eb16024cb..5d2c7abaa6 100644 --- a/src/common/meson.build +++ b/src/common/meson.build @@ -124,13 +124,18 @@ common_sources_frontend_static += files( # least cryptohash_openssl.c, hmac_openssl.c depend on it. # controldata_utils.c depends on wait_event_types_h. That's arguably a # layering violation, but ... +# +# XXX Frontend builds need libpq's pqexpbuffer.h, so adjust the include paths +# appropriately. This seems completely broken. pgcommon = {} pgcommon_variants = { '_srv': internal_lib_args + { + 'include_directories': include_directories('.'), 'sources': common_sources + [lwlocknames_h] + [wait_event_types_h], 'dependencies': [backend_common_code], }, '': default_lib_args + { + 'include_directories': include_directories('../interfaces/libpq', '.'), 'sources': common_sources_frontend_static, 'dependencies': [frontend_common_code], # Files in libpgcommon.a should use/export the "xxx_private" versions @@ -139,6 +144,7 @@ pgcommon_variants = { }, '_shlib': default_lib_args + { 'pic': true, + 'include_directories': include_directories('../interfaces/libpq', '.'), 'sources': common_sources_frontend_shlib, 'dependencies': [frontend_common_code], }, @@ -156,7 +162,6 @@ foreach name, opts : pgcommon_variants c_args = opts.get('c_args', []) + common_cflags[cflagname] cflag_libs += static_library('libpgcommon@0@_@1@'.format(name, cflagname), c_pch: pch_c_h, - include_directories: include_directories('.'), kwargs: opts + { 'sources': sources, 'c_args': c_args, @@ -169,7 +174,6 @@ foreach name, opts : pgcommon_variants lib = static_library('libpgcommon@0@'.format(name), link_with: cflag_libs, c_pch: pch_c_h, - include_directories: include_directories('.'), kwargs: opts + { 'dependencies': opts['dependencies'] + [ssl], } diff --git a/src/common/parse_manifest.c b/src/common/parse_manifest.c index 92a97714f3..62d93989be 100644 --- a/src/common/parse_manifest.c +++ b/src/common/parse_manifest.c @@ -147,7 +147,7 @@ json_parse_manifest(JsonManifestParseContext *context, char *buffer, /* Run the actual JSON parser. */ json_error = pg_parse_json(lex, &sem); if (json_error != JSON_SUCCESS) - json_manifest_parse_failure(context, "parsing failed"); + json_manifest_parse_failure(context, json_errdetail(json_error, lex)); if (parse.state != JM_EXPECT_EOF) json_manifest_parse_failure(context, "manifest ended unexpectedly"); diff --git a/src/common/stringinfo.c b/src/common/stringinfo.c index c61d5c58f3..09419f6042 100644 --- a/src/common/stringinfo.c +++ b/src/common/stringinfo.c @@ -350,3 +350,10 @@ enlargeStringInfo(StringInfo str, int needed) str->maxlen = newlen; } + +void +destroyStringInfo(StringInfo str) +{ + pfree(str->data); + pfree(str); +} diff --git a/src/include/common/jsonapi.h b/src/include/common/jsonapi.h index 02943cdad8..75d444c17a 100644 --- a/src/include/common/jsonapi.h +++ b/src/include/common/jsonapi.h @@ -14,8 +14,6 @@ #ifndef JSONAPI_H #define JSONAPI_H -#include "lib/stringinfo.h" - typedef enum JsonTokenType { JSON_TOKEN_INVALID, @@ -48,6 +46,7 @@ typedef enum JsonParseErrorType JSON_EXPECTED_OBJECT_NEXT, JSON_EXPECTED_STRING, JSON_INVALID_TOKEN, + JSON_OUT_OF_MEMORY, JSON_UNICODE_CODE_POINT_ZERO, JSON_UNICODE_ESCAPE_FORMAT, JSON_UNICODE_HIGH_ESCAPE, @@ -57,6 +56,17 @@ typedef enum JsonParseErrorType JSON_SEM_ACTION_FAILED, /* error should already be reported */ } JsonParseErrorType; +/* + * Don't depend on the internal type header for strval; if callers need access + * then they can include the appropriate header themselves. + */ +#ifdef FRONTEND +#define StrValType PQExpBufferData +#else +#define StrValType StringInfoData +#endif + +typedef struct StrValType StrValType; /* * All the fields in this structure should be treated as read-only. @@ -88,7 +98,9 @@ typedef struct JsonLexContext bits32 flags; int line_number; /* line number, starting from 1 */ char *line_start; /* where that line starts within input */ - StringInfo strval; + bool parse_strval; + StrValType *strval; /* only used if parse_strval == true */ + StrValType *errormsg; } JsonLexContext; typedef JsonParseErrorType (*json_struct_action) (void *state); diff --git a/src/include/lib/stringinfo.h b/src/include/lib/stringinfo.h index 2cd636b01c..64ec6419af 100644 --- a/src/include/lib/stringinfo.h +++ b/src/include/lib/stringinfo.h @@ -233,4 +233,6 @@ extern void appendBinaryStringInfoNT(StringInfo str, */ extern void enlargeStringInfo(StringInfo str, int needed); + +extern void destroyStringInfo(StringInfo str); #endif /* STRINGINFO_H */ -- 2.34.1