Re: Add LZ4 compression in pg_dump - Mailing list pgsql-hackers
From | Justin Pryzby |
---|---|
Subject | Re: Add LZ4 compression in pg_dump |
Date | |
Msg-id | ZBOfIs5AIiM7EDV6@telsasoft.com Whole thread Raw |
In response to | Re: Add LZ4 compression in pg_dump (Tomas Vondra <tomas.vondra@enterprisedb.com>) |
Responses |
Re: Add LZ4 compression in pg_dump
|
List | pgsql-hackers |
On Thu, Mar 16, 2023 at 11:30:50PM +0100, Tomas Vondra wrote: > On 3/16/23 01:20, Justin Pryzby wrote: > > But try reading the diff while looking for the cause of a bug. It's the > > difference between reading 50, two-line changes, and reading a hunk that > > replaces 100 lines with a different 100 lines, with empty/unrelated > > lines randomly thrown in as context. > > I don't know, maybe I'm doing something wrong or maybe I just am bad at > looking at diffs, but if I apply the patch you submitted on 8/3 and do > the git-diff above (output attached), it seems pretty incomprehensible > to me :-( I don't see 50 two-line changes (I certainly wouldn't be able > to identify the root cause of the bug based on that). It's true that most of the diff is still incomprehensible... But look at the part relevant to the "empty-data" bug: [... incomprehensible changes elided ...] > static void > -InitCompressorZlib(CompressorState *cs, int level) > +DeflateCompressorInit(CompressorState *cs) > { > + GzipCompressorState *gzipcs; > z_streamp zp; > > - zp = cs->zp = (z_streamp) pg_malloc(sizeof(z_stream)); > + gzipcs = (GzipCompressorState *) pg_malloc0(sizeof(GzipCompressorState)); > + zp = gzipcs->zp = (z_streamp) pg_malloc(sizeof(z_stream)); > zp->zalloc = Z_NULL; > zp->zfree = Z_NULL; > zp->opaque = Z_NULL; > > /* > - * zlibOutSize is the buffer size we tell zlib it can output to. We > - * actually allocate one extra byte because some routines want to append a > - * trailing zero byte to the zlib output. > + * outsize is the buffer size we tell zlib it can output to. We actually > + * allocate one extra byte because some routines want to append a trailing > + * zero byte to the zlib output. > */ > - cs->zlibOut = (char *) pg_malloc(ZLIB_OUT_SIZE + 1); > - cs->zlibOutSize = ZLIB_OUT_SIZE; > + gzipcs->outbuf = pg_malloc(ZLIB_OUT_SIZE + 1); > + gzipcs->outsize = ZLIB_OUT_SIZE; > > - if (deflateInit(zp, level) != Z_OK) > - pg_fatal("could not initialize compression library: %s", > - zp->msg); > + /* -Z 0 uses the "None" compressor -- not zlib with no compression */ > + Assert(cs->compression_spec.level != 0); > + > + if (deflateInit(zp, cs->compression_spec.level) != Z_OK) > + pg_fatal("could not initialize compression library: %s", zp->msg); > > /* Just be paranoid - maybe End is called after Start, with no Write */ > - zp->next_out = (void *) cs->zlibOut; > - zp->avail_out = cs->zlibOutSize; > + zp->next_out = gzipcs->outbuf; > + zp->avail_out = gzipcs->outsize; > + > + /* Keep track of gzipcs */ > + cs->private_data = gzipcs; > } > > static void > -EndCompressorZlib(ArchiveHandle *AH, CompressorState *cs) > +DeflateCompressorEnd(ArchiveHandle *AH, CompressorState *cs) > { > - z_streamp zp = cs->zp; > + GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data; > + z_streamp zp; > > + zp = gzipcs->zp; > zp->next_in = NULL; > zp->avail_in = 0; > > /* Flush any remaining data from zlib buffer */ > - DeflateCompressorZlib(AH, cs, true); > + DeflateCompressorCommon(AH, cs, true); > > if (deflateEnd(zp) != Z_OK) > pg_fatal("could not close compression stream: %s", zp->msg); > > - free(cs->zlibOut); > - free(cs->zp); > + pg_free(gzipcs->outbuf); > + pg_free(gzipcs->zp); > + pg_free(gzipcs); > + cs->private_data = NULL; > } > > static void > -DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush) > +DeflateCompressorCommon(ArchiveHandle *AH, CompressorState *cs, bool flush) > { > - z_streamp zp = cs->zp; > - char *out = cs->zlibOut; > + GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data; > + z_streamp zp = gzipcs->zp; > + void *out = gzipcs->outbuf; > int res = Z_OK; > > - while (cs->zp->avail_in != 0 || flush) > + while (gzipcs->zp->avail_in != 0 || flush) > { > res = deflate(zp, flush ? Z_FINISH : Z_NO_FLUSH); > if (res == Z_STREAM_ERROR) > pg_fatal("could not compress data: %s", zp->msg); > - if ((flush && (zp->avail_out < cs->zlibOutSize)) > + if ((flush && (zp->avail_out < gzipcs->outsize)) > || (zp->avail_out == 0) > || (zp->avail_in != 0) > ) > @@ -289,18 +122,18 @@ DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush) > * chunk is the EOF marker in the custom format. This should never > * happen but ... > */ > - if (zp->avail_out < cs->zlibOutSize) > + if (zp->avail_out < gzipcs->outsize) > { > /* > * Any write function should do its own error checking but to > * make sure we do a check here as well ... > */ > - size_t len = cs->zlibOutSize - zp->avail_out; > + size_t len = gzipcs->outsize - zp->avail_out; > > - cs->writeF(AH, out, len); > + cs->writeF(AH, (char *) out, len); > } > - zp->next_out = (void *) out; > - zp->avail_out = cs->zlibOutSize; > + zp->next_out = out; > + zp->avail_out = gzipcs->outsize; > } > > if (res == Z_STREAM_END) > @@ -309,16 +142,26 @@ DeflateCompressorZlib(ArchiveHandle *AH, CompressorState *cs, bool flush) > } > > static void > -WriteDataToArchiveZlib(ArchiveHandle *AH, CompressorState *cs, > - const char *data, size_t dLen) > +EndCompressorGzip(ArchiveHandle *AH, CompressorState *cs) > { > - cs->zp->next_in = (void *) unconstify(char *, data); > - cs->zp->avail_in = dLen; > - DeflateCompressorZlib(AH, cs, false); > + /* If deflation was initialized, finalize it */ > + if (cs->private_data) > + DeflateCompressorEnd(AH, cs); > } > > static void > -ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF) > +WriteDataToArchiveGzip(ArchiveHandle *AH, CompressorState *cs, > + const void *data, size_t dLen) > +{ > + GzipCompressorState *gzipcs = (GzipCompressorState *) cs->private_data; > + > + gzipcs->zp->next_in = (void *) unconstify(void *, data); > + gzipcs->zp->avail_in = dLen; > + DeflateCompressorCommon(AH, cs, false); > +} > + > +static void > +ReadDataFromArchiveGzip(ArchiveHandle *AH, CompressorState *cs) > { > z_streamp zp; > char *out; > @@ -342,7 +185,7 @@ ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF) > zp->msg); > > /* no minimal chunk size for zlib */ > - while ((cnt = readF(AH, &buf, &buflen))) > + while ((cnt = cs->readF(AH, &buf, &buflen))) > { > zp->next_in = (void *) buf; > zp->avail_in = cnt; > @@ -382,389 +225,196 @@ ReadDataFromArchiveZlib(ArchiveHandle *AH, ReadFunc readF) > free(out); > free(zp); > } [... more incomprehensible changes elided ...]
pgsql-hackers by date: