From 3c7e5b8ebe3c4d063f8d281a4c9d854fef6578b0 Mon Sep 17 00:00:00 2001 From: Georgios Kokolatos Date: Mon, 16 Jan 2023 15:01:05 +0000 Subject: [PATCH v20 3/3] THIS IS WIP PATCH PRESENTED HERE FOR COMPLETENESS: LZ4 compression Please be aware that comments, references, naming, etc, are not scrutinized. Those will be addressed once 0001 of the series is getting closer to a commitable state. However the patch should apply cleanly and the tests should be passing. Within compress_lz4.{c,h} the streaming API and a file API compression is implemented.. The first one, is aimed at inlined use cases and thus simple lz4.h calls can be used directly. The second one is generating output, or is parsing input, which can be read/generated via the lz4 utility. Wherever the LZ4F api does not implement all the functionality corresponding to fread(), fwrite(), fgets(), fgetc(), feof(), and fclose(), it has been implemented localy. --- doc/src/sgml/ref/pg_dump.sgml | 13 +- src/bin/pg_dump/Makefile | 2 + src/bin/pg_dump/compress_io.c | 11 +- src/bin/pg_dump/compress_lz4.c | 618 +++++++++++++++++++++++++++ src/bin/pg_dump/compress_lz4.h | 22 + src/bin/pg_dump/meson.build | 8 +- src/bin/pg_dump/pg_backup_archiver.c | 14 +- src/bin/pg_dump/pg_dump.c | 5 +- src/bin/pg_dump/t/002_pg_dump.pl | 82 +++- src/tools/pginclude/cpluspluscheck | 1 + src/tools/pgindent/typedefs.list | 1 + 11 files changed, 756 insertions(+), 21 deletions(-) create mode 100644 src/bin/pg_dump/compress_lz4.c create mode 100644 src/bin/pg_dump/compress_lz4.h diff --git a/doc/src/sgml/ref/pg_dump.sgml b/doc/src/sgml/ref/pg_dump.sgml index 2c938cd7e1..49d218905f 100644 --- a/doc/src/sgml/ref/pg_dump.sgml +++ b/doc/src/sgml/ref/pg_dump.sgml @@ -330,9 +330,10 @@ PostgreSQL documentation machine-readable format that pg_restore can read. A directory format archive can be manipulated with standard Unix tools; for example, files in an uncompressed archive - can be compressed with the gzip tool. - This format is compressed by default and also supports parallel - dumps. + can be compressed with the gzip or + lz4tool. + This format is compressed by default using gzip + and also supports parallel dumps. @@ -654,7 +655,7 @@ PostgreSQL documentation Specify the compression method and/or the compression level to use. The compression method can be set to gzip or - none for no compression. + lz4 or none for no compression. A compression detail string can optionally be specified. If the detail string is an integer, it specifies the compression level. Otherwise, it should be a comma-separated list of items, each of the @@ -675,8 +676,8 @@ PostgreSQL documentation individual table-data segments, and the default is to compress using gzip at a moderate level. For plain text output, setting a nonzero compression level causes the entire output file to be compressed, - as though it had been fed through gzip; but the default - is not to compress. + as though it had been fed through gzip or + lz4; but the default is not to compress. The tar archive format currently does not support compression at all. diff --git a/src/bin/pg_dump/Makefile b/src/bin/pg_dump/Makefile index 7a19f5d617..a1401377ab 100644 --- a/src/bin/pg_dump/Makefile +++ b/src/bin/pg_dump/Makefile @@ -17,6 +17,7 @@ top_builddir = ../../.. include $(top_builddir)/src/Makefile.global export GZIP_PROGRAM=$(GZIP) +export LZ4 export with_icu override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) @@ -25,6 +26,7 @@ LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) OBJS = \ $(WIN32RES) \ compress_gzip.o \ + compress_lz4.o \ compress_io.o \ dumputils.o \ parallel.o \ diff --git a/src/bin/pg_dump/compress_io.c b/src/bin/pg_dump/compress_io.c index 7274039e87..43bd4c1a2c 100644 --- a/src/bin/pg_dump/compress_io.c +++ b/src/bin/pg_dump/compress_io.c @@ -60,6 +60,7 @@ #include "compress_io.h" #include "compress_gzip.h" +#include "compress_lz4.h" #include "pg_backup_utils.h" #ifdef HAVE_LIBZ @@ -136,7 +137,7 @@ AllocateCompressor(const pg_compress_specification compression_spec, InitCompressorGzip(cs, compression_spec); break; case PG_COMPRESSION_LZ4: - pg_fatal("compression with %s is not yet supported", "LZ4"); + InitCompressorLZ4(cs, compression_spec); break; case PG_COMPRESSION_ZSTD: pg_fatal("compression with %s is not yet supported", "ZSTD"); @@ -326,7 +327,7 @@ InitCompressFileHandle(const pg_compress_specification compression_spec) InitCompressGzip(CFH, compression_spec); break; case PG_COMPRESSION_LZ4: - pg_fatal("compression with %s is not yet supported", "LZ4"); + InitCompressLZ4(CFH, compression_spec); break; case PG_COMPRESSION_ZSTD: pg_fatal("compression with %s is not yet supported", "ZSTD"); @@ -340,12 +341,12 @@ InitCompressFileHandle(const pg_compress_specification compression_spec) * Open a file for reading. 'path' is the file to open, and 'mode' should * be either "r" or "rb". * - * If the file at 'path' does not exist, we append the "{.gz,.lz4}" suffix (i + * If the file at 'path' does not exist, we append the ".gz" suffix (if * 'path' doesn't already have it) and try again. So if you pass "foo" as - * 'path', this will open either "foo" or "foo.gz" or "foo.lz4", trying in that - * order. + * 'path', this will open either "foo" or "foo.gz", trying in that order. * * On failure, return NULL with an error code in errno. + * */ CompressFileHandle * InitDiscoverCompressFileHandle(const char *path, const char *mode) diff --git a/src/bin/pg_dump/compress_lz4.c b/src/bin/pg_dump/compress_lz4.c new file mode 100644 index 0000000000..c97e16187a --- /dev/null +++ b/src/bin/pg_dump/compress_lz4.c @@ -0,0 +1,618 @@ +/*------------------------------------------------------------------------- + * + * compress_lz4.c + * Routines for archivers to write an uncompressed or compressed data + * stream. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/bin/pg_dump/compress_lz4.c + * + *------------------------------------------------------------------------- + */ +#include "postgres_fe.h" +#include "pg_backup_utils.h" + +#include "compress_lz4.h" + +#ifdef USE_LZ4 +#include +#include + +#define LZ4_OUT_SIZE (4 * 1024) +#define LZ4_IN_SIZE (16 * 1024) + +/* + * LZ4F_HEADER_SIZE_MAX first appeared in v1.7.5 of the library. + * Redefine it for installations with a lesser version. + */ +#ifndef LZ4F_HEADER_SIZE_MAX +#define LZ4F_HEADER_SIZE_MAX 32 +#endif + +/*---------------------- + * Compressor API + *---------------------- + */ + +typedef struct LZ4CompressorState +{ + char *outbuf; + size_t outsize; +} LZ4CompressorState; + +/* Private routines that support LZ4 compressed data I/O */ +static void ReadDataFromArchiveLZ4(ArchiveHandle *AH, CompressorState *cs); +static void WriteDataToArchiveLZ4(ArchiveHandle *AH, CompressorState *cs, + const void *data, size_t dLen); +static void EndCompressorLZ4(ArchiveHandle *AH, CompressorState *cs); + +static void +ReadDataFromArchiveLZ4(ArchiveHandle *AH, CompressorState *cs) +{ + LZ4_streamDecode_t lz4StreamDecode; + char *buf; + char *decbuf; + size_t buflen; + size_t cnt; + + buflen = LZ4_IN_SIZE; + buf = pg_malloc(buflen); + decbuf = pg_malloc(buflen); + + LZ4_setStreamDecode(&lz4StreamDecode, NULL, 0); + + while ((cnt = cs->readF(AH, &buf, &buflen))) + { + int decBytes = LZ4_decompress_safe_continue(&lz4StreamDecode, + buf, decbuf, + cnt, buflen); + + ahwrite(decbuf, 1, decBytes, AH); + } + + pg_free(buf); + pg_free(decbuf); +} + +static void +WriteDataToArchiveLZ4(ArchiveHandle *AH, CompressorState *cs, + const void *data, size_t dLen) +{ + LZ4CompressorState *LZ4cs = (LZ4CompressorState *) cs->private_data; + size_t compressed; + size_t requiredsize = LZ4_compressBound(dLen); + + if (requiredsize > LZ4cs->outsize) + { + LZ4cs->outbuf = pg_realloc(LZ4cs->outbuf, requiredsize); + LZ4cs->outsize = requiredsize; + } + + compressed = LZ4_compress_default(data, LZ4cs->outbuf, + dLen, LZ4cs->outsize); + + if (compressed <= 0) + pg_fatal("failed to LZ4 compress data"); + + cs->writeF(AH, LZ4cs->outbuf, compressed); +} + +static void +EndCompressorLZ4(ArchiveHandle *AH, CompressorState *cs) +{ + LZ4CompressorState *LZ4cs; + + LZ4cs = (LZ4CompressorState *) cs->private_data; + if (LZ4cs) + { + pg_free(LZ4cs->outbuf); + pg_free(LZ4cs); + cs->private_data = NULL; + } +} + + +/* Public routines that support LZ4 compressed data I/O */ +void +InitCompressorLZ4(CompressorState *cs, const pg_compress_specification compression_spec) +{ + cs->readData = ReadDataFromArchiveLZ4; + cs->writeData = WriteDataToArchiveLZ4; + cs->end = EndCompressorLZ4; + + cs->compression_spec = compression_spec; + + /* Will be lazy init'd */ + cs->private_data = pg_malloc0(sizeof(LZ4CompressorState)); +} + +/*---------------------- + * Compress File API + *---------------------- + */ + +/* + * State needed for LZ4 (de)compression using the CompressFileHandle API. + */ +typedef struct LZ4File +{ + FILE *fp; + + LZ4F_preferences_t prefs; + + LZ4F_compressionContext_t ctx; + LZ4F_decompressionContext_t dtx; + + bool inited; + bool compressing; + + size_t buflen; + char *buffer; + + size_t overflowalloclen; + size_t overflowlen; + char *overflowbuf; + + size_t errcode; +} LZ4File; + +/* + * LZ4 equivalent to feof() or gzeof(). The end of file + * is reached if there is no decompressed output in the + * overflow buffer and the end of the file is reached. + */ +static int +LZ4File_eof(CompressFileHandle *CFH) +{ + LZ4File *fs = (LZ4File *) CFH->private_data; + + return fs->overflowlen == 0 && feof(fs->fp); +} + +static const char * +LZ4File_get_error(CompressFileHandle *CFH) +{ + LZ4File *fs = (LZ4File *) CFH->private_data; + const char *errmsg; + + if (LZ4F_isError(fs->errcode)) + errmsg = LZ4F_getErrorName(fs->errcode); + else + errmsg = strerror(errno); + + return errmsg; +} + +/* + * Prepare an already alloc'ed LZ4File struct for subsequent calls. + * + * It creates the nessary contexts for the operations. When compressing, + * it additionally writes the LZ4 header in the output stream. + */ +static int +LZ4File_init(LZ4File * fs, int size, bool compressing) +{ + size_t status; + + if (fs->inited) + return 0; + + fs->compressing = compressing; + fs->inited = true; + + if (fs->compressing) + { + fs->buflen = LZ4F_compressBound(LZ4_IN_SIZE, &fs->prefs); + if (fs->buflen < LZ4F_HEADER_SIZE_MAX) + fs->buflen = LZ4F_HEADER_SIZE_MAX; + + status = LZ4F_createCompressionContext(&fs->ctx, LZ4F_VERSION); + if (LZ4F_isError(status)) + { + fs->errcode = status; + return 1; + } + + fs->buffer = pg_malloc(fs->buflen); + status = LZ4F_compressBegin(fs->ctx, fs->buffer, fs->buflen, + &fs->prefs); + + if (LZ4F_isError(status)) + { + fs->errcode = status; + return 1; + } + + if (fwrite(fs->buffer, 1, status, fs->fp) != status) + { + errno = errno ? : ENOSPC; + return 1; + } + } + else + { + status = LZ4F_createDecompressionContext(&fs->dtx, LZ4F_VERSION); + if (LZ4F_isError(status)) + { + fs->errcode = status; + return 1; + } + + fs->buflen = size > LZ4_OUT_SIZE ? size : LZ4_OUT_SIZE; + fs->buffer = pg_malloc(fs->buflen); + + fs->overflowalloclen = fs->buflen; + fs->overflowbuf = pg_malloc(fs->overflowalloclen); + fs->overflowlen = 0; + } + + return 0; +} + +/* + * Read already decompressed content from the overflow buffer into 'ptr' up to + * 'size' bytes, if available. If the eol_flag is set, then stop at the first + * occurance of the new line char prior to 'size' bytes. + * + * Any unread content in the overflow buffer, is moved to the beginning. + */ +static int +LZ4File_read_overflow(LZ4File * fs, void *ptr, int size, bool eol_flag) +{ + char *p; + int readlen = 0; + + if (fs->overflowlen == 0) + return 0; + + if (fs->overflowlen >= size) + readlen = size; + else + readlen = fs->overflowlen; + + if (eol_flag && (p = memchr(fs->overflowbuf, '\n', readlen))) + /* Include the line terminating char */ + readlen = p - fs->overflowbuf + 1; + + memcpy(ptr, fs->overflowbuf, readlen); + fs->overflowlen -= readlen; + + if (fs->overflowlen > 0) + memmove(fs->overflowbuf, fs->overflowbuf + readlen, fs->overflowlen); + + return readlen; +} + +/* + * The workhorse for reading decompressed content out of an LZ4 compressed + * stream. + * + * It will read up to 'ptrsize' decompressed content, or up to the new line char + * if found first when the eol_flag is set. It is possible that the decompressed + * output generated by reading any compressed input via the LZ4F API, exceeds + * 'ptrsize'. Any exceeding decompressed content is stored at an overflow + * buffer within LZ4File. Of course, when the function is called, it will first + * try to consume any decompressed content already present in the overflow + * buffer, before decompressing new content. + */ +static int +LZ4File_read_internal(LZ4File * fs, void *ptr, int ptrsize, bool eol_flag) +{ + size_t dsize = 0; + size_t rsize; + size_t size = ptrsize; + bool eol_found = false; + + void *readbuf; + + /* Lazy init */ + if (!fs->inited && LZ4File_init(fs, size, false /* decompressing */ )) + return -1; + + /* Verfiy that there is enough space in the outbuf */ + if (size > fs->buflen) + { + fs->buflen = size; + fs->buffer = pg_realloc(fs->buffer, size); + } + + /* use already decompressed content if available */ + dsize = LZ4File_read_overflow(fs, ptr, size, eol_flag); + if (dsize == size || (eol_flag && memchr(ptr, '\n', dsize))) + return dsize; + + readbuf = pg_malloc(size); + + do + { + char *rp; + char *rend; + + rsize = fread(readbuf, 1, size, fs->fp); + if (rsize < size && !feof(fs->fp)) + return -1; + + rp = (char *) readbuf; + rend = (char *) readbuf + rsize; + + while (rp < rend) + { + size_t status; + size_t outlen = fs->buflen; + size_t read_remain = rend - rp; + + memset(fs->buffer, 0, outlen); + status = LZ4F_decompress(fs->dtx, fs->buffer, &outlen, + rp, &read_remain, NULL); + if (LZ4F_isError(status)) + { + fs->errcode = status; + return -1; + } + + rp += read_remain; + + /* + * fill in what space is available in ptr if the eol flag is set, + * either skip if one already found or fill up to EOL if present + * in the outbuf + */ + if (outlen > 0 && dsize < size && eol_found == false) + { + char *p; + size_t lib = (eol_flag == 0) ? size - dsize : size - 1 - dsize; + size_t len = outlen < lib ? outlen : lib; + + if (eol_flag == true && + (p = memchr(fs->buffer, '\n', outlen)) && + (size_t) (p - fs->buffer + 1) <= len) + { + len = p - fs->buffer + 1; + eol_found = true; + } + + memcpy((char *) ptr + dsize, fs->buffer, len); + dsize += len; + + /* move what did not fit, if any, at the begining of the buf */ + if (len < outlen) + memmove(fs->buffer, fs->buffer + len, outlen - len); + outlen -= len; + } + + /* if there is available output, save it */ + if (outlen > 0) + { + while (fs->overflowlen + outlen > fs->overflowalloclen) + { + fs->overflowalloclen *= 2; + fs->overflowbuf = pg_realloc(fs->overflowbuf, + fs->overflowalloclen); + } + + memcpy(fs->overflowbuf + fs->overflowlen, fs->buffer, outlen); + fs->overflowlen += outlen; + } + } + } while (rsize == size && dsize < size && eol_found == 0); + + pg_free(readbuf); + + return (int) dsize; +} + +/* + * Compress size bytes from ptr and write them to the stream. + */ +static size_t +LZ4File_write(const void *ptr, size_t size, CompressFileHandle *CFH) +{ + LZ4File *fs = (LZ4File *) CFH->private_data; + size_t status; + int remaining = size; + + if (!fs->inited && LZ4File_init(fs, size, true)) + return -1; + + while (remaining > 0) + { + int chunk = remaining < LZ4_IN_SIZE ? remaining : LZ4_IN_SIZE; + + remaining -= chunk; + + status = LZ4F_compressUpdate(fs->ctx, fs->buffer, fs->buflen, + ptr, chunk, NULL); + if (LZ4F_isError(status)) + { + fs->errcode = status; + return -1; + } + + if (fwrite(fs->buffer, 1, status, fs->fp) != status) + { + errno = errno ? : ENOSPC; + return 1; + } + } + + return size; +} + +/* + * fread() equivalent implementation for LZ4 compressed files. + */ +static size_t +LZ4File_read(void *ptr, size_t size, CompressFileHandle *CFH) +{ + LZ4File *fs = (LZ4File *) CFH->private_data; + int ret; + + ret = LZ4File_read_internal(fs, ptr, size, false); + if (ret != size && !LZ4File_eof(CFH)) + pg_fatal("could not read from input file: %s", LZ4File_get_error(CFH)); + + return ret; +} + +/* + * fgetc() equivalent implementation for LZ4 compressed files. + */ +static int +LZ4File_getc(CompressFileHandle *CFH) +{ + LZ4File *fs = (LZ4File *) CFH->private_data; + unsigned char c; + + if (LZ4File_read_internal(fs, &c, 1, false) != 1) + { + if (!LZ4File_eof(CFH)) + pg_fatal("could not read from input file: %s", LZ4File_get_error(CFH)); + else + pg_fatal("could not read from input file: end of file"); + } + + return c; +} + +/* + * fgets() equivalent implementation for LZ4 compressed files. + */ +static char * +LZ4File_gets(char *ptr, int size, CompressFileHandle *CFH) +{ + LZ4File *fs = (LZ4File *) CFH->private_data; + size_t dsize; + + dsize = LZ4File_read_internal(fs, ptr, size, true); + if (dsize < 0) + pg_fatal("could not read from input file: %s", LZ4File_get_error(CFH)); + + /* Done reading */ + if (dsize == 0) + return NULL; + + return ptr; +} + +/* + * Finalize (de)compression of a stream. When compressing it will write any + * remaining content and/or generated footer from the LZ4 API. + */ +static int +LZ4File_close(CompressFileHandle *CFH) +{ + FILE *fp; + LZ4File *fs = (LZ4File *) CFH->private_data; + size_t status; + int ret; + + fp = fs->fp; + if (fs->inited) + { + if (fs->compressing) + { + status = LZ4F_compressEnd(fs->ctx, fs->buffer, fs->buflen, NULL); + if (LZ4F_isError(status)) + pg_fatal("failed to end compression: %s", + LZ4F_getErrorName(status)); + else if ((ret = fwrite(fs->buffer, 1, status, fs->fp)) != status) + { + errno = errno ? : ENOSPC; + WRITE_ERROR_EXIT; + } + + status = LZ4F_freeCompressionContext(fs->ctx); + if (LZ4F_isError(status)) + pg_fatal("failed to end compression: %s", + LZ4F_getErrorName(status)); + } + else + { + status = LZ4F_freeDecompressionContext(fs->dtx); + if (LZ4F_isError(status)) + pg_fatal("failed to end decompression: %s", + LZ4F_getErrorName(status)); + pg_free(fs->overflowbuf); + } + + pg_free(fs->buffer); + } + + pg_free(fs); + + return fclose(fp); +} + +static int +LZ4File_open(const char *path, int fd, const char *mode, + CompressFileHandle *CFH) +{ + FILE *fp; + LZ4File *lz4fp = (LZ4File *) CFH->private_data; + + if (fd >= 0) + fp = fdopen(fd, mode); + else + fp = fopen(path, mode); + if (fp == NULL) + { + lz4fp->errcode = errno; + return 1; + } + + lz4fp->fp = fp; + + return 0; +} + +static int +LZ4File_open_write(const char *path, const char *mode, CompressFileHandle *CFH) +{ + char *fname; + int ret; + + fname = psprintf("%s.lz4", path); + ret = CFH->open_func(fname, -1, mode, CFH); + pg_free(fname); + + return ret; +} + +void +InitCompressLZ4(CompressFileHandle *CFH, const pg_compress_specification compression_spec) +{ + LZ4File *lz4fp; + + CFH->open_func = LZ4File_open; + CFH->open_write_func = LZ4File_open_write; + CFH->read_func = LZ4File_read; + CFH->write_func = LZ4File_write; + CFH->gets_func = LZ4File_gets; + CFH->getc_func = LZ4File_getc; + CFH->eof_func = LZ4File_eof; + CFH->close_func = LZ4File_close; + CFH->get_error_func = LZ4File_get_error; + + CFH->compression_spec = compression_spec; + lz4fp = pg_malloc0(sizeof(*lz4fp)); + if (CFH->compression_spec.level >= 0) + lz4fp->prefs.compressionLevel = CFH->compression_spec.level; + + CFH->private_data = lz4fp; +} +#else /* USE_LZ4 */ +void +InitCompressorLZ4(CompressorState *cs, const pg_compress_specification compression_spec) +{ + pg_fatal("this build does not support compression with %s", "LZ4"); +} + +void +InitCompressLZ4(CompressFileHandle *CFH, const pg_compress_specification compression_spec) +{ + pg_fatal("this build does not support compression with %s", "LZ4"); +} +#endif /* USE_LZ4 */ diff --git a/src/bin/pg_dump/compress_lz4.h b/src/bin/pg_dump/compress_lz4.h new file mode 100644 index 0000000000..74595db1b9 --- /dev/null +++ b/src/bin/pg_dump/compress_lz4.h @@ -0,0 +1,22 @@ +/*------------------------------------------------------------------------- + * + * compress_lz4.h + * Interface to compress_io.c routines + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/bin/pg_dump/compress_lz4.h + * + *------------------------------------------------------------------------- + */ +#ifndef _COMPRESS_LZ4_H_ +#define _COMPRESS_LZ4_H_ + +#include "compress_io.h" + +extern void InitCompressorLZ4(CompressorState *cs, const pg_compress_specification compression_spec); +extern void InitCompressLZ4(CompressFileHandle *CFH, const pg_compress_specification compression_spec); + +#endif /* _COMPRESS_LZ4_H_ */ diff --git a/src/bin/pg_dump/meson.build b/src/bin/pg_dump/meson.build index aa2c91829c..473d40d456 100644 --- a/src/bin/pg_dump/meson.build +++ b/src/bin/pg_dump/meson.build @@ -3,6 +3,7 @@ pg_dump_common_sources = files( 'compress_io.c', 'compress_gzip.c', + 'compress_lz4.c', 'dumputils.c', 'parallel.c', 'pg_backup_archiver.c', @@ -17,7 +18,7 @@ pg_dump_common_sources = files( pg_dump_common = static_library('libpgdump_common', pg_dump_common_sources, c_pch: pch_postgres_fe_h, - dependencies: [frontend_code, libpq, zlib], + dependencies: [frontend_code, libpq, lz4, zlib], kwargs: internal_lib_args, ) @@ -85,7 +86,10 @@ tests += { 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), 'tap': { - 'env': {'GZIP_PROGRAM': gzip.path()}, + 'env': { + 'GZIP_PROGRAM': gzip.path(), + 'LZ4': program_lz4.found() ? program_lz4.path() : '', + }, 'tests': [ 't/001_basic.pl', 't/002_pg_dump.pl', diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 1f207c6f4d..119b7f2553 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -395,6 +395,10 @@ RestoreArchive(Archive *AHX) #ifndef HAVE_LIBZ if (AH->compression_spec.algorithm == PG_COMPRESSION_GZIP) supports_compression = false; +#endif +#ifndef USE_LZ4 + if (AH->compression_spec.algorithm == PG_COMPRESSION_LZ4) + supports_compression = false; #endif if (supports_compression == false) pg_fatal("cannot restore from compressed archive (compression not supported in this installation)"); @@ -2074,7 +2078,7 @@ _discoverArchiveFormat(ArchiveHandle *AH) /* * Check if the specified archive is a directory. If so, check if - * there's a "toc.dat" (or "toc.dat.gz") file in it. + * there's a "toc.dat" (or "toc.dat.{gz,lz4}") file in it. */ if (stat(AH->fSpec, &st) == 0 && S_ISDIR(st.st_mode)) { @@ -2084,6 +2088,10 @@ _discoverArchiveFormat(ArchiveHandle *AH) #ifdef HAVE_LIBZ if (_fileExistsInDirectory(AH->fSpec, "toc.dat.gz")) return AH->format; +#endif +#ifdef USE_LZ4 + if (_fileExistsInDirectory(AH->fSpec, "toc.dat.lz4")) + return AH->format; #endif pg_fatal("directory \"%s\" does not appear to be a valid archive (\"toc.dat\" does not exist)", AH->fSpec); @@ -3747,6 +3755,10 @@ ReadHead(ArchiveHandle *AH) #ifndef HAVE_LIBZ if (AH->compression_spec.algorithm == PG_COMPRESSION_GZIP) unsupported = true; +#endif +#ifndef USE_LZ4 + if (AH->compression_spec.algorithm == PG_COMPRESSION_LZ4) + unsupported = true; #endif if (unsupported) pg_fatal("archive is compressed, but this installation does not support compression"); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 2c0a969972..f392760e06 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -715,13 +715,12 @@ main(int argc, char **argv) case PG_COMPRESSION_NONE: /* fallthrough */ case PG_COMPRESSION_GZIP: + /* fallthrough */ + case PG_COMPRESSION_LZ4: break; case PG_COMPRESSION_ZSTD: pg_fatal("compression with %s is not yet supported", "ZSTD"); break; - case PG_COMPRESSION_LZ4: - pg_fatal("compression with %s is not yet supported", "LZ4"); - break; } /* diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index f3ba926321..f497ec6040 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -139,6 +139,80 @@ my %pgdump_runs = ( args => [ '-d', "$tempdir/compression_gzip_plain.sql.gz", ], }, }, + + # Do not use --no-sync to give test coverage for data sync. + compression_lz4_custom => { + test_key => 'compression', + compile_option => 'lz4', + dump_cmd => [ + 'pg_dump', '--format=custom', + '--compress=lz4', "--file=$tempdir/compression_lz4_custom.dump", + 'postgres', + ], + restore_cmd => [ + 'pg_restore', + "--file=$tempdir/compression_lz4_custom.sql", + "$tempdir/compression_lz4_custom.dump", + ], + command_like => { + command => [ + 'pg_restore', + '-l', "$tempdir/compression_lz4_custom.dump", + ], + expected => qr/Compression: lz4/, + name => 'data content is lz4 compressed' + }, + }, + + # Do not use --no-sync to give test coverage for data sync. + compression_lz4_dir => { + test_key => 'compression', + compile_option => 'lz4', + dump_cmd => [ + 'pg_dump', '--jobs=2', + '--format=directory', '--compress=lz4:1', + "--file=$tempdir/compression_lz4_dir", 'postgres', + ], + # Give coverage for manually compressed blob.toc files during + # restore. + compress_cmd => { + program => $ENV{'LZ4'}, + args => [ + '-z', '-f', '--rm', + "$tempdir/compression_lz4_dir/blobs.toc", + "$tempdir/compression_lz4_dir/blobs.toc.lz4", + ], + }, + # Verify that data files where compressed + glob_patterns => [ + "$tempdir/compression_lz4_dir/toc.dat", + "$tempdir/compression_lz4_dir/*.dat.lz4", + ], + restore_cmd => [ + 'pg_restore', '--jobs=2', + "--file=$tempdir/compression_lz4_dir.sql", + "$tempdir/compression_lz4_dir", + ], + }, + + compression_lz4_plain => { + test_key => 'compression', + compile_option => 'lz4', + dump_cmd => [ + 'pg_dump', '--format=plain', '--compress=lz4', + "--file=$tempdir/compression_lz4_plain.sql.lz4", 'postgres', + ], + # Decompress the generated file to run through the tests. + compress_cmd => { + program => $ENV{'LZ4'}, + args => [ + '-d', '-f', + "$tempdir/compression_lz4_plain.sql.lz4", + "$tempdir/compression_lz4_plain.sql", + ], + }, + }, + clean => { dump_cmd => [ 'pg_dump', @@ -4175,11 +4249,11 @@ foreach my $run (sort keys %pgdump_runs) my $run_db = 'postgres'; # Skip command-level tests for gzip if there is no support for it. - if ( defined($pgdump_runs{$run}->{compile_option}) - && $pgdump_runs{$run}->{compile_option} eq 'gzip' - && !$supports_gzip) + if ($pgdump_runs{$run}->{compile_option} && + ($pgdump_runs{$run}->{compile_option} eq 'gzip' && !$supports_gzip) || + ($pgdump_runs{$run}->{compile_option} eq 'lz4' && !$supports_lz4)) { - note "$run: skipped due to no gzip support"; + note "$run: skipped due to no $pgdump_runs{$run}->{compile_option} support"; next; } diff --git a/src/tools/pginclude/cpluspluscheck b/src/tools/pginclude/cpluspluscheck index da8e4861f4..1f7f817e4d 100755 --- a/src/tools/pginclude/cpluspluscheck +++ b/src/tools/pginclude/cpluspluscheck @@ -152,6 +152,7 @@ do # as field names, which is unfortunate but we won't change it now. test "$f" = src/bin/pg_dump/compress_gzip.h && continue test "$f" = src/bin/pg_dump/compress_io.h && continue + test "$f" = src/bin/pg_dump/compress_lz4.h && continue test "$f" = src/bin/pg_dump/parallel.h && continue test "$f" = src/bin/pg_dump/pg_backup_archiver.h && continue test "$f" = src/bin/pg_dump/pg_dump.h && continue diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 840191d680..232228d427 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1381,6 +1381,7 @@ LWLock LWLockHandle LWLockMode LWLockPadded +LZ4CompressorState LZ4F_compressionContext_t LZ4F_decompressOptions_t LZ4F_decompressionContext_t -- 2.34.1