From 95450f0e7e90f0a1a3cdfc12c760a9520bd2995f Mon Sep 17 00:00:00 2001 From: Georgios Kokolatos Date: Wed, 1 Mar 2023 12:42:32 +0000 Subject: [PATCH vX] Properly gzip compress when no data is available When creating dumps with the Compressor API, it is possible to only call the Allocate and End compressor functions without ever writing any data. The gzip implementation wrongly assumed that the write function would be called and defered the initialization of the internal compression system for the first write call. The End call would only finilize the internal compression system if that was ever initialized. The problem with that approach is that it violated the expectations of the internal compression system during decompression. This commit initializes the internal compression system during the Allocate call, under the condition that a write function was provided by the caller. Given that decompression does not need to keep track of any state, the compressor's private_data member is now populated only during compression. Tests are added to cover this scenario. Initial patch by Justin Pruzby. Reported-by: Justin Pryzby --- src/bin/pg_dump/compress_gzip.c | 118 +++++++++++++++++-------------- src/bin/pg_dump/t/002_pg_dump.pl | 27 ++++++- 2 files changed, 91 insertions(+), 54 deletions(-) diff --git a/src/bin/pg_dump/compress_gzip.c b/src/bin/pg_dump/compress_gzip.c index 0af65afeb4..f5d32cf059 100644 --- a/src/bin/pg_dump/compress_gzip.c +++ b/src/bin/pg_dump/compress_gzip.c @@ -32,9 +32,48 @@ typedef struct GzipCompressorState size_t outsize; } GzipCompressorState; + /* Private routines that support gzip compressed data I/O */ static void -DeflateCompressorGzip(ArchiveHandle *AH, CompressorState *cs, bool flush) +DeflateCompressorInit(CompressorState *cs) +{ + GzipCompressorState *gzipcs; + z_streamp zp; + + gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState)); + zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream)); + zp->zalloc = Z_NULL; + zp->zfree = Z_NULL; + zp->opaque = Z_NULL; + + /* + * outsize is the buffer size we tell zlib it can output to. We + * actually allocate one extra byte because some routines want to + * append a trailing zero byte to the zlib output. + */ + gzipcs->outbuf = pg_malloc(ZLIB_OUT_SIZE + 1); + gzipcs->outsize = ZLIB_OUT_SIZE; + + /* + * A level of zero simply copies the input one block at the time. This + * is probably not what the user wanted when calling this interface. + */ + if (cs->compression_spec.level == 0) + pg_fatal("requested to compress the archive yet no level was specified"); + + if (deflateInit(zp, cs->compression_spec.level) != Z_OK) + pg_fatal("could not initialize compression library: %s", zp->msg); + + /* Just be paranoid - maybe End is called after Start, with no Write */ + zp->next_out = gzipcs->outbuf; + zp->avail_out = gzipcs->outsize; + + /* Keep track of gzipcs */ + cs->private_data = gzipcs; +} + +static void +DeflateCompressorData(ArchiveHandle *AH, CompressorState *cs, bool flush) { GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data; z_streamp zp = gzipcs->zp; @@ -76,71 +115,44 @@ DeflateCompressorGzip(ArchiveHandle *AH, CompressorState *cs, bool flush) } static void -EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs) +DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs) { GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data; z_streamp zp; - if (gzipcs->zp) - { - zp = gzipcs->zp; - zp->next_in = NULL; - zp->avail_in = 0; - - /* Flush any remaining data from zlib buffer */ - DeflateCompressorGzip(AH, cs, true); + zp = gzipcs->zp; + zp->next_in = NULL; + zp->avail_in = 0; - if (deflateEnd(zp) != Z_OK) - pg_fatal("could not close compression stream: %s", zp->msg); + /* Flush any remaining data from zlib buffer */ + DeflateCompressorData(AH, cs, true); - pg_free(gzipcs->outbuf); - pg_free(gzipcs->zp); - } + if (deflateEnd(zp) != Z_OK) + pg_fatal("could not close compression stream: %s", zp->msg); + pg_free(gzipcs->outbuf); + pg_free(gzipcs->zp); pg_free(gzipcs); cs->private_data = NULL; } +static void +EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs) +{ + /* If deflation was initialized, finalize it */ + if (cs->private_data) + DeflateCompressorEnd(AH, cs); +} + static void WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs, const void *data, size_t dLen) { GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data; - z_streamp zp; - - if (!gzipcs->zp) - { - zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream)); - zp->zalloc = Z_NULL; - zp->zfree = Z_NULL; - zp->opaque = Z_NULL; - - /* - * outsize is the buffer size we tell zlib it can output to. We - * actually allocate one extra byte because some routines want to - * append a trailing zero byte to the zlib output. - */ - gzipcs->outbuf = pg_malloc(ZLIB_OUT_SIZE + 1); - gzipcs->outsize = ZLIB_OUT_SIZE; - - /* - * A level of zero simply copies the input one block at the time. This - * is probably not what the user wanted when calling this interface. - */ - if (cs->compression_spec.level == 0) - pg_fatal("requested to compress the archive yet no level was specified"); - - if (deflateInit(zp, cs->compression_spec.level) != Z_OK) - pg_fatal("could not initialize compression library: %s", zp->msg); - - /* Just be paranoid - maybe End is called after Start, with no Write */ - zp->next_out = gzipcs->outbuf; - zp->avail_out = gzipcs->outsize; - } gzipcs->zp->next_in = (void *) unconstify(void *, data); gzipcs->zp->avail_in = dLen; - DeflateCompressorGzip(AH, cs, false); + DeflateCompressorData(AH, cs, false); } static void @@ -214,17 +226,19 @@ void InitCompressorGzip(CompressorState *cs, const pg_compress_specification compression_spec) { - GzipCompressorState *gzipcs; - cs->readData = ReadDataFromArchiveGzip; cs->writeData = WriteDataToArchiveGzip; cs->end = EndCompressorGzip; cs->compression_spec = compression_spec; - gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState)); - - cs->private_data = gzipcs; + /* + * If the caller has defined a write function, prepare the necessary state. + * Avoid initializing during the first write call, because End may be called + * without ever writing any data. + */ + if (cs->writeF) + DeflateCompressorInit(cs); } diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 72b19ee6cd..7b5a6e190c 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -1194,6 +1194,29 @@ my %tests = ( }, }, + 'LO create (with no data)' => { + create_sql => + 'SELECT pg_catalog.lo_create(0);', + regexp => qr/^ + \QSELECT pg_catalog.lo_open\E \('\d+',\ \d+\);\n + \QSELECT pg_catalog.lo_close(0);\E + /xm, + like => { + %full_runs, + column_inserts => 1, + data_only => 1, + inserts => 1, + section_data => 1, + test_schema_plus_large_objects => 1, + }, + unlike => { + binary_upgrade => 1, + no_large_objects => 1, + schema_only => 1, + section_pre_data => 1, + }, + }, + 'COMMENT ON DATABASE postgres' => { regexp => qr/^COMMENT ON DATABASE postgres IS .+;/m, @@ -4250,8 +4273,8 @@ foreach my $run (sort keys %pgdump_runs) # Skip command-level tests for gzip if there is no support for it. if ($pgdump_runs{$run}->{compile_option} && - ($pgdump_runs{$run}->{compile_option} eq 'gzip' && !$supports_gzip) || - ($pgdump_runs{$run}->{compile_option} eq 'lz4' && !$supports_lz4)) + (($pgdump_runs{$run}->{compile_option} eq 'gzip' && !$supports_gzip) || + ($pgdump_runs{$run}->{compile_option} eq 'lz4' && !$supports_lz4))) { note "$run: skipped due to no $pgdump_runs{$run}->{compile_option} support"; next; -- 2.34.1