From 86d0bcfe2bb6752c3cf773d28eb7c201cb41bdf0 Mon Sep 17 00:00:00 2001 From: Andrew Dunstan Date: Thu, 22 Feb 2024 03:04:41 -0500 Subject: [PATCH v8 5/5] fixes for non-null terminated inputs for incremental json parsing --- src/bin/pg_combinebackup/load_manifest.c | 5 ++- src/bin/pg_verifybackup/pg_verifybackup.c | 5 ++- src/common/jsonapi.c | 43 ++++++++++++++----- .../test_json_parser_incremental.c | 5 ++- 4 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/bin/pg_combinebackup/load_manifest.c b/src/bin/pg_combinebackup/load_manifest.c index 982be78e28..ae73d01190 100644 --- a/src/bin/pg_combinebackup/load_manifest.c +++ b/src/bin/pg_combinebackup/load_manifest.c @@ -172,7 +172,7 @@ load_backup_manifest(char *backup_directory) inc_state = json_parse_manifest_incremental_init(&context); - buffer = pg_malloc(chunk_size + 1); + buffer = pg_malloc(chunk_size + 64); while (bytes_left > 0) { @@ -188,7 +188,6 @@ load_backup_manifest(char *backup_directory) else if (bytes_left < 2 * chunk_size) bytes_to_read = bytes_left / 2; rc = read(fd, buffer, bytes_to_read); - buffer[rc] = '\0'; /* useful for writing log traces */ if (rc != bytes_to_read) { if (rc < 0) @@ -199,6 +198,8 @@ load_backup_manifest(char *backup_directory) (long long int)(statbuf.st_size + rc - bytes_left), (long long int) statbuf.st_size); } + /* exercise non-null-terminated chunks */ + strcpy(buffer + rc, "1+23 trailing junk"); bytes_left -= rc; json_parse_manifest_incremental_chunk( inc_state, buffer, rc, bytes_left == 0); diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index 02b160f9fc..6eaa376bf0 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -453,7 +453,7 @@ parse_manifest_file(char *manifest_path, manifest_files_hash **ht_p, inc_state = json_parse_manifest_incremental_init(&context); - buffer = pg_malloc(chunk_size + 1); + buffer = pg_malloc(chunk_size + 64); while (bytes_left > 0) { @@ -469,7 +469,6 @@ parse_manifest_file(char *manifest_path, manifest_files_hash **ht_p, else if (bytes_left < 2 * chunk_size) bytes_to_read = bytes_left / 2; rc = read(fd, buffer, bytes_to_read); - buffer[rc] = '\0'; /* useful for writing log traces */ if (rc != bytes_to_read) { if (rc < 0) @@ -480,6 +479,8 @@ parse_manifest_file(char *manifest_path, manifest_files_hash **ht_p, (long long int)(statbuf.st_size + rc - bytes_left), (long long int) statbuf.st_size); } + /* test for non-null terminated chunk */ + strcpy(buffer + rc, "1+23 trailing junk"); bytes_left -= rc; json_parse_manifest_incremental_chunk( inc_state, buffer, rc, bytes_left == 0); diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index 25fca8851d..11a22faa18 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -1317,14 +1317,37 @@ json_lex(JsonLexContext *lex) if (c == '-' || (c >= '0' && c <= '9')) { /* for numbers look for possible numeric continuations */ - size_t nums = strspn(lex->input, "+-.eE0123456789"); - for (int i = 0; i < nums; i++) + bool numend = false; + + for (int i = 0; i < lex->input_length && !numend; i++) { char cc = lex->input[i]; - appendStringInfoCharMacro(ptok, cc); - added++; + switch (cc) + { + case '+': + case '-': + case 'e': + case 'E': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + appendStringInfoCharMacro(ptok, cc); + added++; + } + break; + default: + numend = true; + } } } /* add any remaining alpha_numeric chars */ @@ -1496,8 +1519,8 @@ json_lex(JsonLexContext *lex) if (lex->incremental && !lex->inc_state->is_last_chunk && p == lex->input + lex->input_length) { - appendStringInfoString( - &(lex->inc_state->partial_token), s); + appendBinaryStringInfo( + &(lex->inc_state->partial_token), s, end - s); return JSON_INCOMPLETE; } @@ -1554,8 +1577,8 @@ json_lex_string(JsonLexContext *lex) do { \ if (lex->incremental && !lex->inc_state->is_last_chunk) \ { \ - appendStringInfoString(&lex->inc_state->partial_token, \ - lex->token_start); \ + appendBinaryStringInfo(&lex->inc_state->partial_token, \ + lex->token_start, end - lex->token_start); \ return JSON_INCOMPLETE; \ } \ lex->token_terminator = s; \ @@ -1893,8 +1916,8 @@ json_lex_number(JsonLexContext *lex, char *s, if (lex->incremental && !lex->inc_state->is_last_chunk && len >= lex->input_length) { - appendStringInfoString(&lex->inc_state->partial_token, - lex->token_start); + appendBinaryStringInfo(&lex->inc_state->partial_token, + lex->token_start, s - lex->token_start); return JSON_INCOMPLETE; } else if (num_err != NULL) diff --git a/src/test/modules/test_json_parser/test_json_parser_incremental.c b/src/test/modules/test_json_parser/test_json_parser_incremental.c index edb51ef403..dee5c6f7d1 100644 --- a/src/test/modules/test_json_parser/test_json_parser_incremental.c +++ b/src/test/modules/test_json_parser/test_json_parser_incremental.c @@ -42,10 +42,11 @@ main(int argc, char **argv) while ((n_read = fread(buff, 1, 60, json_file)) > 0) { appendBinaryStringInfo(&json, buff, n_read); + appendStringInfoString(&json, "1+23 trailing junk"); if (!feof(json_file)) { result = pg_parse_json_incremental(&lex, &nullSemAction, - json.data, json.len, + json.data, n_read, false); if (result != JSON_INCOMPLETE) { @@ -59,7 +60,7 @@ main(int argc, char **argv) else { result = pg_parse_json_incremental(&lex, &nullSemAction, - json.data, json.len, + json.data, n_read, true); if (result != JSON_SUCCESS) { -- 2.34.1