From c5e6d08fa3fe3febd5b6b31ad3bda109fa693167 Mon Sep 17 00:00:00 2001 From: Nikhil Kumar Veldanda Date: Thu, 5 Jun 2025 05:30:14 +0000 Subject: [PATCH v25 3/3] Implement Zstd compression (no dictionary support) --- contrib/amcheck/verify_heapam.c | 1 + doc/src/sgml/catalogs.sgml | 28 +- doc/src/sgml/config.sgml | 12 +- doc/src/sgml/ref/alter_table.sgml | 8 +- doc/src/sgml/ref/create_table.sgml | 13 +- src/backend/access/common/detoast.c | 12 +- src/backend/access/common/toast_compression.c | 162 +++++++- src/backend/access/common/toast_internals.c | 4 + src/backend/utils/adt/varlena.c | 3 + src/backend/utils/misc/guc_tables.c | 3 + src/backend/utils/misc/postgresql.conf.sample | 2 +- src/bin/pg_dump/pg_dump.c | 3 + src/bin/psql/describe.c | 5 +- src/bin/psql/tab-complete.in.c | 2 +- src/include/access/toast_compression.h | 13 +- src/include/access/toast_internals.h | 3 +- src/include/varatt.h | 3 +- .../regress/expected/compression_zstd.out | 376 ++++++++++++++++++ .../regress/expected/compression_zstd_1.out | 5 + src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/compression_zstd.sql | 162 ++++++++ 21 files changed, 776 insertions(+), 46 deletions(-) create mode 100644 src/test/regress/expected/compression_zstd.out create mode 100644 src/test/regress/expected/compression_zstd_1.out create mode 100644 src/test/regress/sql/compression_zstd.sql diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 2161d129502..b50f3b43951 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -1792,6 +1792,7 @@ check_tuple_attribute(HeapCheckContext *ctx) /* List of all valid compression method IDs */ case TOAST_PGLZ_COMPRESSION_ID: case TOAST_LZ4_COMPRESSION_ID: + case TOAST_ZSTD_COMPRESSION_ID: valid = true; break; diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index fa86c569dc4..ef37c9c4630 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1240,19 +1240,21 @@ - - attcompression char - - - The current compression method of the column. Typically this is - '\0' to specify use of the current default setting - (see ). Otherwise, - 'p' selects pglz compression, while - 'l' selects LZ4 - compression. However, this field is ignored - whenever attstorage does not allow - compression. - + + + attcompression char + + + The current compression method of the column. Typically this is + '\0' to specify use of the current default setting + (see ). Otherwise, + 'p' selects pglz compression, while + 'l' selects LZ4 compression, + and 'z' selects ZSTD compression. + However, this field is ignored whenever + attstorage does not allow compression. + + diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 021153b2a5f..11a76910539 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3402,8 +3402,8 @@ include_dir 'conf.d' A compressed page image will be decompressed during WAL replay. The supported methods are pglz, lz4 (if PostgreSQL - was compiled with ) and - zstd (if PostgreSQL + was compiled with ), + and zstd (if PostgreSQL was compiled with ). The default value is off. Only superusers and users with the appropriate SET @@ -9817,9 +9817,11 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; the COMPRESSION column option in CREATE TABLE or ALTER TABLE.) - The supported compression methods are pglz and - (if PostgreSQL was compiled with - ) lz4. + The supported compression methods are pglz, + lz4 (if PostgreSQL + was compiled with ), + and zstd (if PostgreSQL + was compiled with ). The default is pglz. diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index d63f3a621ac..cc32f4aa699 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -443,10 +443,10 @@ WITH ( MODULUS numeric_literal, REM its existing compression method, rather than being recompressed with the compression method of the target column. The supported compression - methods are pglz and lz4. - (lz4 is available only if - was used when building PostgreSQL.) In - addition, compression_method + methods are pglz, + lz4 (if PostgreSQL was compiled with ), + and zstd (if PostgreSQL was compiled with ). + In addition, compression_method can be default, which selects the default behavior of consulting the setting at the time of data insertion to determine the method to use. diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index 4a41b2f5530..a5149282b7a 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -337,16 +337,19 @@ WITH ( MODULUS numeric_literal, REM The COMPRESSION clause sets the compression method for the column. Compression is supported only for variable-width data - types, and is used only when the column's storage mode + types, and is used only when the column’s storage mode is main or extended. (See for information on column storage modes.) Setting this property for a partitioned table has no direct effect, because such tables have no storage of their own, but the configured value will be inherited by newly-created partitions. - The supported compression methods are pglz and - lz4. (lz4 is available only if - was used when building - PostgreSQL.) In addition, + The supported compression methods are + pglz, + lz4 (if PostgreSQL + was compiled with ), + and zstd (if PostgreSQL + was compiled with ). + In addition, compression_method can be default to explicitly specify the default behavior, which is to consult the diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index 01419d1c65f..6a2e6c9683d 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -246,10 +246,10 @@ detoast_attr_slice(struct varlena *attr, * Determine maximum amount of compressed data needed for a prefix * of a given length (after decompression). * - * At least for now, if it's LZ4 data, we'll have to fetch the - * whole thing, because there doesn't seem to be an API call to - * determine how much compressed data we need to be sure of being - * able to decompress the required slice. + * At least for now, if it's LZ4 or Zstandard data, we'll have to + * fetch the whole thing, because there doesn't seem to be an API + * call to determine how much compressed data we need to be sure + * of being able to decompress the required slice. */ if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) == TOAST_PGLZ_COMPRESSION_ID) @@ -485,6 +485,8 @@ toast_decompress_datum(struct varlena *attr) return pglz_decompress_datum(attr); case TOAST_LZ4_COMPRESSION_ID: return lz4_decompress_datum(attr); + case TOAST_ZSTD_COMPRESSION_ID: + return zstd_decompress_datum(attr); default: elog(ERROR, "invalid compression method id %d", cmid); return NULL; /* keep compiler quiet */ @@ -528,6 +530,8 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength) return pglz_decompress_datum_slice(attr, slicelength); case TOAST_LZ4_COMPRESSION_ID: return lz4_decompress_datum_slice(attr, slicelength); + case TOAST_ZSTD_COMPRESSION_ID: + return zstd_decompress_datum_slice(attr, slicelength); default: elog(ERROR, "invalid compression method id %d", cmid); return NULL; /* keep compiler quiet */ diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c index fb93555bdb0..37c85c7fb18 100644 --- a/src/backend/access/common/toast_compression.c +++ b/src/backend/access/common/toast_compression.c @@ -17,6 +17,10 @@ #include #endif +#ifdef USE_ZSTD +#include +#endif + #include "access/detoast.h" #include "access/toast_compression.h" #include "common/pg_lzcompress.h" @@ -28,11 +32,19 @@ /* GUC */ int default_toast_compression = TOAST_PGLZ_COMPRESSION; -#define NO_LZ4_SUPPORT() \ +#ifdef USE_ZSTD +#define ZSTD_CHECK_ERROR(zstd_ret, msg) \ + do { \ + if (ZSTD_isError(zstd_ret)) \ + ereport(ERROR, (errmsg("%s: %s", (msg), ZSTD_getErrorName(zstd_ret)))); \ + } while (0) +#endif + +#define COMPRESSION_METHOD_NOT_SUPPORTED(method) \ ereport(ERROR, \ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ - errmsg("compression method lz4 not supported"), \ - errdetail("This functionality requires the server to be built with lz4 support."))) + errmsg("compression method %s not supported", method), \ + errdetail("This functionality requires the server to be built with %s support.", method))) /* * Compress a varlena using PGLZ. @@ -142,7 +154,7 @@ struct varlena * lz4_compress_datum(const struct varlena *value) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); return NULL; /* keep compiler quiet */ #else int32 valsize; @@ -185,7 +197,7 @@ struct varlena * lz4_decompress_datum(const struct varlena *value) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); return NULL; /* keep compiler quiet */ #else int32 rawsize; @@ -218,7 +230,7 @@ struct varlena * lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); return NULL; /* keep compiler quiet */ #else int32 rawsize; @@ -248,6 +260,133 @@ lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) #endif } +/* Compress datum using ZSTD */ +struct varlena * +zstd_compress_datum(const struct varlena *value) +{ +#ifdef USE_ZSTD + uint32 valsize = VARSIZE_ANY_EXHDR(value); + size_t max_size = ZSTD_compressBound(valsize); + struct varlena *compressed; + size_t cmp_size; + + /* Allocate space for the compressed varlena (header + data) */ + compressed = (struct varlena *) palloc(max_size + VARHDRSZ_4BCE); + + cmp_size = ZSTD_compress(VARDATA_4BCE(compressed), + max_size, + VARDATA_ANY(value), + valsize, + ZSTD_CLEVEL_DEFAULT); + + if (ZSTD_isError(cmp_size)) + { + pfree(compressed); + ZSTD_CHECK_ERROR(cmp_size, "ZSTD compression failed"); + } + + /** + * If compression did not reduce size, return NULL so that the uncompressed data is stored + */ + if (cmp_size > valsize) + { + pfree(compressed); + return NULL; + } + + /* Set the compressed size in the varlena header */ + SET_VARSIZE_COMPRESSED(compressed, cmp_size + VARHDRSZ_4BCE); + + return compressed; + +#else + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); + return NULL; +#endif +} + +/* Decompression routine */ +struct varlena * +zstd_decompress_datum(const struct varlena *value) +{ +#ifdef USE_ZSTD + /* ZSTD no dictionary compression */ + uint32 actual_size_exhdr = VARDATA_COMPRESSED_GET_EXTSIZE(value); + uint32 cmplen; + struct varlena *result; + size_t ucmplen; + + cmplen = VARSIZE_ANY(value) - VARHDRSZ_4BCE; + + /* Allocate space for the uncompressed data */ + result = (struct varlena *) palloc(actual_size_exhdr + VARHDRSZ); + + ucmplen = ZSTD_decompress(VARDATA(result), + actual_size_exhdr, + VARDATA_4BCE(value), + cmplen); + + if (ZSTD_isError(ucmplen)) + { + pfree(result); + ZSTD_CHECK_ERROR(ucmplen, "ZSTD decompression failed"); + } + + /* Set final size in the varlena header */ + SET_VARSIZE(result, ucmplen + VARHDRSZ); + return result; + +#else + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); + return NULL; +#endif +} + +/* Decompress a slice of the datum */ +struct varlena * +zstd_decompress_datum_slice(const struct varlena *value, int32 slicelength) +{ +#ifdef USE_ZSTD + /* ZSTD no dictionary compression */ + + struct varlena *result; + ZSTD_inBuffer inBuf; + ZSTD_outBuffer outBuf; + size_t ret; + ZSTD_DCtx *zstdDctx = ZSTD_createDCtx(); + + inBuf.src = VARDATA_4BCE(value); + inBuf.size = VARSIZE_ANY(value) - VARHDRSZ_4BCE; + inBuf.pos = 0; + + result = (struct varlena *) palloc(slicelength + VARHDRSZ); + outBuf.dst = (char *) result + VARHDRSZ; + outBuf.size = slicelength; + outBuf.pos = 0; + + /* Common decompression loop */ + while (inBuf.pos < inBuf.size && outBuf.pos < outBuf.size) + { + ret = ZSTD_decompressStream(zstdDctx, &outBuf, &inBuf); + if (ZSTD_isError(ret)) + { + pfree(result); + ZSTD_freeDCtx(zstdDctx); + ZSTD_CHECK_ERROR(ret, "zstd decompression failed"); + } + } + + Assert(outBuf.size == slicelength && outBuf.pos == slicelength); + SET_VARSIZE(result, outBuf.pos + VARHDRSZ); + ZSTD_freeDCtx(zstdDctx); + + return result; +#else + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); + return NULL; +#endif +} + /* * Extract compression ID from a varlena. * @@ -292,10 +431,17 @@ CompressionNameToMethod(const char *compression) else if (strcmp(compression, "lz4") == 0) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); #endif return TOAST_LZ4_COMPRESSION; } + else if (strcmp(compression, "zstd") == 0) + { +#ifndef USE_ZSTD + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); +#endif + return TOAST_ZSTD_COMPRESSION; + } return InvalidCompressionMethod; } @@ -312,6 +458,8 @@ GetCompressionMethodName(char method) return "pglz"; case TOAST_LZ4_COMPRESSION: return "lz4"; + case TOAST_ZSTD_COMPRESSION: + return "zstd"; default: elog(ERROR, "invalid compression method %c", method); return NULL; /* keep compiler quiet */ diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index 32653af2e9e..500443a3535 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -71,6 +71,10 @@ toast_compress_datum(Datum value, char cmethod) tmp = lz4_compress_datum((const struct varlena *) value); cmid = TOAST_LZ4_COMPRESSION_ID; break; + case TOAST_ZSTD_COMPRESSION: + tmp = zstd_compress_datum((const struct varlena *) value); + cmid = TOAST_ZSTD_COMPRESSION_ID; + break; default: elog(ERROR, "invalid compression method %c", cmethod); } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 3e4d5568bde..063780e56dc 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -5301,6 +5301,9 @@ pg_column_compression(PG_FUNCTION_ARGS) case TOAST_LZ4_COMPRESSION_ID: result = "lz4"; break; + case TOAST_ZSTD_COMPRESSION_ID: + result = "zstd"; + break; default: elog(ERROR, "invalid compression method id %d", cmid); } diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index f04bfedb2fd..4bc1a6029ec 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -460,6 +460,9 @@ static const struct config_enum_entry default_toast_compression_options[] = { {"pglz", TOAST_PGLZ_COMPRESSION, false}, #ifdef USE_LZ4 {"lz4", TOAST_LZ4_COMPRESSION, false}, +#endif +#ifdef USE_ZSTD + {"zstd", TOAST_ZSTD_COMPRESSION, false}, #endif {NULL, 0, false} }; diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 341f88adc87..b45b6e5f0ce 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -752,7 +752,7 @@ autovacuum_worker_slots = 16 # autovacuum worker slots to allocate #row_security = on #default_table_access_method = 'heap' #default_tablespace = '' # a tablespace name, '' uses the default -#default_toast_compression = 'pglz' # 'pglz' or 'lz4' +#default_toast_compression = 'pglz' # 'pglz' or 'lz4' or 'zstd' #temp_tablespaces = '' # a list of tablespace names, '' uses # only default tablespace #check_function_bodies = on diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 37432e66efd..6083ae1a6ad 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -17570,6 +17570,9 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) case 'l': cmname = "lz4"; break; + case 'z': + cmname = "zstd"; + break; default: cmname = NULL; break; diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 1d08268393e..26951f8f890 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2171,8 +2171,9 @@ describeOneTableDetails(const char *schemaname, /* these strings are literal in our syntax, so not translated. */ printTableAddCell(&cont, (compression[0] == 'p' ? "pglz" : (compression[0] == 'l' ? "lz4" : - (compression[0] == '\0' ? "" : - "???"))), + (compression[0] == 'z' ? "zstd" : + (compression[0] == '\0' ? "" : + "???")))), false, false); } diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index ec65ab79fec..a3ff8c1d9ae 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -2885,7 +2885,7 @@ match_previous_words(int pattern_id, /* ALTER TABLE ALTER [COLUMN] SET COMPRESSION */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "COMPRESSION") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "COMPRESSION")) - COMPLETE_WITH("DEFAULT", "PGLZ", "LZ4"); + COMPLETE_WITH("DEFAULT", "PGLZ", "LZ4", "ZSTD"); /* ALTER TABLE ALTER [COLUMN] SET EXPRESSION */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "EXPRESSION") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "EXPRESSION")) diff --git a/src/include/access/toast_compression.h b/src/include/access/toast_compression.h index 62b77edf372..0d7b521c481 100644 --- a/src/include/access/toast_compression.h +++ b/src/include/access/toast_compression.h @@ -13,6 +13,10 @@ #ifndef TOAST_COMPRESSION_H #define TOAST_COMPRESSION_H +#ifdef USE_ZSTD +#include +#endif + /* * GUC support. * @@ -43,7 +47,8 @@ typedef enum ToastCompressionId { TOAST_PGLZ_COMPRESSION_ID = 0, TOAST_LZ4_COMPRESSION_ID = 1, - TOAST_INVALID_COMPRESSION_ID = 2, + TOAST_ZSTD_COMPRESSION_ID = 2, + TOAST_INVALID_COMPRESSION_ID = 3, } ToastCompressionId; /* @@ -53,6 +58,7 @@ typedef enum ToastCompressionId */ #define TOAST_PGLZ_COMPRESSION 'p' #define TOAST_LZ4_COMPRESSION 'l' +#define TOAST_ZSTD_COMPRESSION 'z' #define InvalidCompressionMethod '\0' #define CompressionMethodIsValid(cm) ((cm) != InvalidCompressionMethod) @@ -73,6 +79,11 @@ extern struct varlena *lz4_decompress_datum(const struct varlena *value); extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength); +/* zstd nodict compression/decompression routines */ +extern struct varlena *zstd_compress_datum(const struct varlena *value); +extern struct varlena *zstd_decompress_datum(const struct varlena *value); +extern struct varlena *zstd_decompress_datum_slice(const struct varlena *value, int32 slicelength); + /* other stuff */ extern ToastCompressionId toast_get_compression_id(struct varlena *attr); extern char CompressionNameToMethod(const char *compression); diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h index 857b53431c8..35277086f52 100644 --- a/src/include/access/toast_internals.h +++ b/src/include/access/toast_internals.h @@ -25,7 +25,8 @@ do { \ Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \ Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \ - (cm_method) == TOAST_LZ4_COMPRESSION_ID); \ + (cm_method) == TOAST_LZ4_COMPRESSION_ID || \ + (cm_method) == TOAST_ZSTD_COMPRESSION_ID); \ if (!TOAST_CMPID_EXTENDED((cm_method))) \ ((varattrib_4b *)(ptr))->va_compressed.va_tcinfo = ((uint32)(len)) | ((uint32)(cm_method) << VARLENA_EXTSIZE_BITS); \ else \ diff --git a/src/include/varatt.h b/src/include/varatt.h index 39dcfc4b4b8..4ca8dac814f 100644 --- a/src/include/varatt.h +++ b/src/include/varatt.h @@ -401,7 +401,8 @@ typedef struct #define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, len, cm) \ do { \ Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \ - (cm) == TOAST_LZ4_COMPRESSION_ID); \ + (cm) == TOAST_LZ4_COMPRESSION_ID || \ + (cm) == TOAST_ZSTD_COMPRESSION_ID); \ if (!TOAST_CMPID_EXTENDED((cm))) \ /* method fits in the low bits of va_extinfo */ \ (toast_pointer).va_extinfo = (uint32)(len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS); \ diff --git a/src/test/regress/expected/compression_zstd.out b/src/test/regress/expected/compression_zstd.out new file mode 100644 index 00000000000..97222b20a28 --- /dev/null +++ b/src/test/regress/expected/compression_zstd.out @@ -0,0 +1,376 @@ +SELECT NOT(pg_compression_available('zstd')) AS skip_test \gset +\if :skip_test + \echo '*** skipping zstd tests (zstd not available) ***' + \quit +\endif +CREATE SCHEMA zstd; +SET search_path TO zstd, public; +\set HIDE_TOAST_COMPRESSION false +-- ensure we get stable results regardless of installation's default +SET default_toast_compression = 'zstd'; +-- test creating table with compression method +CREATE TABLE cmdata(f1 text COMPRESSION pglz); +CREATE INDEX idx ON cmdata(f1); +INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); +\d+ cmdata + Table "zstd.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | pglz | | +Indexes: + "idx" btree (f1) + +CREATE TABLE cmdata1(f1 TEXT COMPRESSION zstd); +INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); -- inline +INSERT INTO cmdata1 VALUES (repeat('1234567890', 2500000)); -- externally stored +\d+ cmdata1 + Table "zstd.cmdata1" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | zstd | | + +-- verify stored compression method in the data +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz +(1 row) + +SELECT pg_column_compression(f1) FROM cmdata1; + pg_column_compression +----------------------- + zstd + zstd +(2 rows) + +-- decompress data slice +SELECT SUBSTR(f1, 200, 5) FROM cmdata; + substr +-------- + 01234 +(1 row) + +SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; + substr +---------------------------------------------------- + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 +(2 rows) + +-- copy with table creation +SELECT * INTO cmmove1 FROM cmdata; +\d+ cmmove1 + Table "zstd.cmmove1" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | | | + +SELECT pg_column_compression(f1) FROM cmmove1; + pg_column_compression +----------------------- + pglz +(1 row) + +-- copy to existing table +CREATE TABLE cmmove3(f1 text COMPRESSION pglz); +INSERT INTO cmmove3 SELECT * FROM cmdata; +INSERT INTO cmmove3 SELECT * FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove3; + pg_column_compression +----------------------- + pglz + zstd + zstd +(3 rows) + +-- test LIKE INCLUDING COMPRESSION +CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | zstd | | + +DROP TABLE cmdata2; +-- try setting compression for incompressible data type +CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); +ERROR: column data type integer does not support compression +-- update using datum from different table +CREATE TABLE cmmove2(f1 text COMPRESSION pglz); +INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); +SELECT pg_column_compression(f1) FROM cmmove2; + pg_column_compression +----------------------- + pglz +(1 row) + +UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove2; + pg_column_compression +----------------------- + zstd +(1 row) + +-- test externally stored compressed data +CREATE OR REPLACE FUNCTION large_val() RETURNS TEXT LANGUAGE SQL AS +'select array_agg(fipshash(g::text))::text from generate_series(1, 256) g'; +CREATE TABLE cmdata2 (f1 text COMPRESSION pglz); +INSERT INTO cmdata2 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata2; + pg_column_compression +----------------------- + pglz +(1 row) + +INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata1; + pg_column_compression +----------------------- + zstd + zstd + zstd +(3 rows) + +SELECT SUBSTR(f1, 200, 5) FROM cmdata1; + substr +-------- + 01234 + 01234 + 79026 +(3 rows) + +SELECT SUBSTR(f1, 200, 5) FROM cmdata2; + substr +-------- + 79026 +(1 row) + +DROP TABLE cmdata2; +--test column type update varlena/non-varlena +CREATE TABLE cmdata2 (f1 int); +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+---------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | integer | | | | plain | | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | character varying | | | | extended | | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+---------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | integer | | | | plain | | | + +--changing column storage should not impact the compression method +--but the data should not be compressed +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION pglz; +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | character varying | | | | extended | pglz | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | character varying | | | | plain | pglz | | + +INSERT INTO cmdata2 VALUES (repeat('123456789', 800)); +SELECT pg_column_compression(f1) FROM cmdata2; + pg_column_compression +----------------------- + +(1 row) + +-- test compression with materialized view +CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; +\d+ compressmv + Materialized view "zstd.compressmv" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + x | text | | | | extended | | | +View definition: + SELECT f1 AS x + FROM cmdata1; + +SELECT pg_column_compression(f1) FROM cmdata1; + pg_column_compression +----------------------- + zstd + zstd + zstd +(3 rows) + +SELECT pg_column_compression(x) FROM compressmv; + pg_column_compression +----------------------- + zstd + zstd + zstd +(3 rows) + +-- test compression with partition +CREATE TABLE cmpart(f1 text COMPRESSION zstd) PARTITION BY HASH(f1); +CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE cmpart2(f1 text COMPRESSION pglz); +ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; + pg_column_compression +----------------------- + zstd +(1 row) + +SELECT pg_column_compression(f1) FROM cmpart2; + pg_column_compression +----------------------- + pglz +(1 row) + +-- test compression with inheritance +CREATE TABLE cminh() INHERITS(cmdata, cmdata1); -- error +NOTICE: merging multiple inherited definitions of column "f1" +ERROR: column "f1" has a compression method conflict +DETAIL: pglz versus zstd +CREATE TABLE cminh(f1 TEXT COMPRESSION zstd) INHERITS(cmdata); -- error +NOTICE: merging column "f1" with inherited definition +ERROR: column "f1" has a compression method conflict +DETAIL: pglz versus zstd +CREATE TABLE cmdata3(f1 text); +CREATE TABLE cminh() INHERITS (cmdata, cmdata3); +NOTICE: merging multiple inherited definitions of column "f1" +-- test default_toast_compression GUC +SET default_toast_compression = 'zstd'; +-- test alter compression method +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION zstd; +INSERT INTO cmdata VALUES (repeat('123456789', 4004)); +\d+ cmdata + Table "zstd.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | zstd | | +Indexes: + "idx" btree (f1) +Child tables: cminh + +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + zstd +(2 rows) + +ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION default; +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | character varying | | | | plain | | | + +-- test alter compression method for materialized views +ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION zstd; +\d+ compressmv + Materialized view "zstd.compressmv" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + x | text | | | | extended | zstd | | +View definition: + SELECT f1 AS x + FROM cmdata1; + +-- test alter compression method for partitioned tables +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION zstd; +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; + pg_column_compression +----------------------- + zstd + pglz +(2 rows) + +SELECT pg_column_compression(f1) FROM cmpart2; + pg_column_compression +----------------------- + pglz + zstd +(2 rows) + +-- VACUUM FULL does not recompress +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + zstd +(2 rows) + +VACUUM FULL cmdata; +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + zstd +(2 rows) + +-- test expression index +DROP TABLE cmdata2; +CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION zstd); +CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); +INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM +generate_series(1, 50) g), VERSION()); +-- check data is ok +SELECT length(f1) FROM cmdata; + length +-------- + 10000 + 36036 +(2 rows) + +SELECT length(f1) FROM cmdata1; + length +---------- + 10040 + 25000000 + 12449 +(3 rows) + +SELECT length(f1) FROM cmmove1; + length +-------- + 10000 +(1 row) + +SELECT length(f1) FROM cmmove2; + length +-------- + 10040 +(1 row) + +SELECT length(f1) FROM cmmove3; + length +---------- + 10000 + 10040 + 25000000 +(3 rows) + +CREATE TABLE badcompresstbl (a text COMPRESSION I_Do_Not_Exist_Compression); -- fails +ERROR: invalid compression method "i_do_not_exist_compression" +CREATE TABLE badcompresstbl (a text); +ALTER TABLE badcompresstbl ALTER a SET COMPRESSION I_Do_Not_Exist_Compression; -- fails +ERROR: invalid compression method "i_do_not_exist_compression" +DROP TABLE badcompresstbl; +\set HIDE_TOAST_COMPRESSION true diff --git a/src/test/regress/expected/compression_zstd_1.out b/src/test/regress/expected/compression_zstd_1.out new file mode 100644 index 00000000000..6ad1a812533 --- /dev/null +++ b/src/test/regress/expected/compression_zstd_1.out @@ -0,0 +1,5 @@ +SELECT NOT(pg_compression_available('zstd')) AS skip_test \gset +\if :skip_test + \echo '*** skipping zstd tests (zstd not available) ***' +*** skipping zstd tests (zstd not available) *** + \quit diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index fbffc67ae60..1ef4797cd10 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -123,7 +123,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 memoize stats predicate numa +test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 compression_zstd memoize stats predicate numa # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL diff --git a/src/test/regress/sql/compression_zstd.sql b/src/test/regress/sql/compression_zstd.sql new file mode 100644 index 00000000000..ec709387a17 --- /dev/null +++ b/src/test/regress/sql/compression_zstd.sql @@ -0,0 +1,162 @@ +SELECT NOT(pg_compression_available('zstd')) AS skip_test \gset +\if :skip_test + \echo '*** skipping zstd tests (zstd not available) ***' + \quit +\endif + +CREATE SCHEMA zstd; +SET search_path TO zstd, public; + +\set HIDE_TOAST_COMPRESSION false + +-- ensure we get stable results regardless of installation's default +SET default_toast_compression = 'zstd'; + +-- test creating table with compression method +CREATE TABLE cmdata(f1 text COMPRESSION pglz); +CREATE INDEX idx ON cmdata(f1); +INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); +\d+ cmdata +CREATE TABLE cmdata1(f1 TEXT COMPRESSION zstd); +INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); -- inline +INSERT INTO cmdata1 VALUES (repeat('1234567890', 2500000)); -- externally stored +\d+ cmdata1 + +-- verify stored compression method in the data +SELECT pg_column_compression(f1) FROM cmdata; +SELECT pg_column_compression(f1) FROM cmdata1; + +-- decompress data slice +SELECT SUBSTR(f1, 200, 5) FROM cmdata; +SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; + +-- copy with table creation +SELECT * INTO cmmove1 FROM cmdata; +\d+ cmmove1 +SELECT pg_column_compression(f1) FROM cmmove1; + +-- copy to existing table +CREATE TABLE cmmove3(f1 text COMPRESSION pglz); +INSERT INTO cmmove3 SELECT * FROM cmdata; +INSERT INTO cmmove3 SELECT * FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove3; + +-- test LIKE INCLUDING COMPRESSION +CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); +\d+ cmdata2 +DROP TABLE cmdata2; + +-- try setting compression for incompressible data type +CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); + +-- update using datum from different table +CREATE TABLE cmmove2(f1 text COMPRESSION pglz); +INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); +SELECT pg_column_compression(f1) FROM cmmove2; +UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove2; + +-- test externally stored compressed data +CREATE OR REPLACE FUNCTION large_val() RETURNS TEXT LANGUAGE SQL AS +'select array_agg(fipshash(g::text))::text from generate_series(1, 256) g'; +CREATE TABLE cmdata2 (f1 text COMPRESSION pglz); +INSERT INTO cmdata2 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata2; +INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata1; +SELECT SUBSTR(f1, 200, 5) FROM cmdata1; +SELECT SUBSTR(f1, 200, 5) FROM cmdata2; +DROP TABLE cmdata2; + +--test column type update varlena/non-varlena +CREATE TABLE cmdata2 (f1 int); +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; +\d+ cmdata2 + +--changing column storage should not impact the compression method +--but the data should not be compressed +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION pglz; +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; +\d+ cmdata2 +INSERT INTO cmdata2 VALUES (repeat('123456789', 800)); +SELECT pg_column_compression(f1) FROM cmdata2; + +-- test compression with materialized view +CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; +\d+ compressmv +SELECT pg_column_compression(f1) FROM cmdata1; +SELECT pg_column_compression(x) FROM compressmv; + +-- test compression with partition +CREATE TABLE cmpart(f1 text COMPRESSION zstd) PARTITION BY HASH(f1); +CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE cmpart2(f1 text COMPRESSION pglz); + +ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; +SELECT pg_column_compression(f1) FROM cmpart2; + +-- test compression with inheritance +CREATE TABLE cminh() INHERITS(cmdata, cmdata1); -- error +CREATE TABLE cminh(f1 TEXT COMPRESSION zstd) INHERITS(cmdata); -- error +CREATE TABLE cmdata3(f1 text); +CREATE TABLE cminh() INHERITS (cmdata, cmdata3); + +-- test default_toast_compression GUC +SET default_toast_compression = 'zstd'; + +-- test alter compression method +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION zstd; +INSERT INTO cmdata VALUES (repeat('123456789', 4004)); +\d+ cmdata +SELECT pg_column_compression(f1) FROM cmdata; + +ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION default; +\d+ cmdata2 + +-- test alter compression method for materialized views +ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION zstd; +\d+ compressmv + +-- test alter compression method for partitioned tables +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION zstd; + +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; +SELECT pg_column_compression(f1) FROM cmpart2; + +-- VACUUM FULL does not recompress +SELECT pg_column_compression(f1) FROM cmdata; +VACUUM FULL cmdata; +SELECT pg_column_compression(f1) FROM cmdata; + +-- test expression index +DROP TABLE cmdata2; +CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION zstd); +CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); +INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM +generate_series(1, 50) g), VERSION()); + +-- check data is ok +SELECT length(f1) FROM cmdata; +SELECT length(f1) FROM cmdata1; +SELECT length(f1) FROM cmmove1; +SELECT length(f1) FROM cmmove2; +SELECT length(f1) FROM cmmove3; + +CREATE TABLE badcompresstbl (a text COMPRESSION I_Do_Not_Exist_Compression); -- fails +CREATE TABLE badcompresstbl (a text); +ALTER TABLE badcompresstbl ALTER a SET COMPRESSION I_Do_Not_Exist_Compression; -- fails +DROP TABLE badcompresstbl; + +\set HIDE_TOAST_COMPRESSION true -- 2.47.1