From da304477b42e84f5af59c8fedf0e7bdf4442fbc3 Mon Sep 17 00:00:00 2001 From: dilipkumar Date: Mon, 7 Dec 2020 15:26:57 +0530 Subject: [PATCH v16 4/6] Create custom compression methods Provide syntax to create custom compression methods. --- doc/src/sgml/ref/alter_table.sgml | 17 +++--- doc/src/sgml/ref/create_access_method.sgml | 13 ++-- doc/src/sgml/ref/create_table.sgml | 11 ++-- src/backend/access/common/detoast.c | 59 +++++++++++++++++-- src/backend/access/common/toast_internals.c | 19 +++++- src/backend/access/compression/compress_lz4.c | 21 +++---- .../access/compression/compress_pglz.c | 20 +++---- .../access/compression/compressamapi.c | 47 +++++++++++++++ src/backend/commands/amcmds.c | 5 ++ src/backend/executor/nodeModifyTable.c | 2 +- src/backend/parser/gram.y | 1 + src/backend/utils/adt/varlena.c | 7 +-- src/bin/pg_dump/pg_dump.c | 3 + src/bin/psql/tab-complete.c | 6 ++ src/include/access/compressamapi.h | 16 +++-- src/include/access/detoast.h | 8 +++ src/include/access/toast_internals.h | 15 +++++ src/test/regress/expected/compression.out | 41 ++++++++++++- src/test/regress/sql/compression.sql | 11 ++++ 19 files changed, 264 insertions(+), 58 deletions(-) diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 2fb9cfaadc..9efbc2352c 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -390,14 +390,15 @@ WITH ( MODULUS numeric_literal, REM - This clause adds compression to a column. Compression method can be set - from available built-in compression methods. The available built-in - methods are pglz and lz4. The - PRESERVE list contains list of compression methods used on the column and - determines which of them should be kept on the column. Without PRESERVE or - if all the previous compression methods are not preserved then the table - will be rewritten. If PRESERVE ALL is specified then all the previous - methods will be preserved and the table will not be rewritten. + This clause adds compression to a column. Compression method + could be created with or it can + be set from the available built-in compression methods. The available + built-in methods are pglz and lz4. + The PRESERVE list contains list of compression methods used on the column + and determines which of them should be kept on the column. Without + PRESERVE or if all the previous compression methods are not preserved then + the table will be rewritten. If PRESERVE ALL is specified then all the + previous methods will be preserved and the table will not be rewritten. diff --git a/doc/src/sgml/ref/create_access_method.sgml b/doc/src/sgml/ref/create_access_method.sgml index dae43dbaed..79f1290a58 100644 --- a/doc/src/sgml/ref/create_access_method.sgml +++ b/doc/src/sgml/ref/create_access_method.sgml @@ -61,7 +61,7 @@ CREATE ACCESS METHOD name This clause specifies the type of access method to define. - Only TABLE and INDEX + TABLE, INDEX and COMPRESSION are supported at present. @@ -77,12 +77,15 @@ CREATE ACCESS METHOD name declared to take a single argument of type internal, and its return type depends on the type of access method; for TABLE access methods, it must - be table_am_handler and for INDEX - access methods, it must be index_am_handler. + be table_am_handler, for INDEX + access methods, it must be index_am_handler and + for COMPRESSION access methods, it must be + compression_am_handler. The C-level API that the handler function must implement varies depending on the type of access method. The table access method API - is described in and the index access method - API is described in . + is described in , the index access method + API is described in an the compression access + method API is described in . diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index f404dd1088..ade3989d75 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -999,11 +999,12 @@ WITH ( MODULUS numeric_literal, REM This clause adds the compression method to a column. Compression method - can be set from the available built-in compression methods. The available - options are pglz and lz4. If the - compression method is not sepcified for the compressible type then it will - have the default compression method. The default compression method is - pglz. + could be created with or it can + be set from the available built-in compression methods. The available + built-in methods are pglz and lz4. + If the compression method is not sepcified for the compressible type then + it will have the default compression method. The default compression + method is pglz. diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index d66d733da6..f6c48ac955 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -438,13 +438,44 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, return result; } +/* ---------- + * toast_get_compression_oid - + * + * Return the Oid of the compresion method stored in the compressed data. + * For built-in methods, we only store the built-in compression method id in + * first 2-bits of the rawsize and that is directly mapped to the compression + * method Oid. And, for the custom compression method we store the Oid of the + * compression method in the custom compression header. + */ +Oid +toast_get_compression_oid(struct varlena *attr) +{ + CompressionId cmid; + + /* + * If it is custom compression id then get the Oid from the custom + * compression header otherwise, directly translate the buil-in compression + * id to compression method Oid. + */ + cmid = TOAST_COMPRESS_METHOD(attr); + if (IsCustomCompression(cmid)) + { + toast_compress_header_custom *hdr; + + hdr = (toast_compress_header_custom *) attr; + return hdr->cmoid; + } + else + return CompressionIdToOid(cmid); +} + /* ---------- * toast_get_compression_handler - get the compression handler routines * * helper function for toast_decompress_datum and toast_decompress_datum_slice */ static inline const CompressionAmRoutine * -toast_get_compression_handler(struct varlena *attr) +toast_get_compression_handler(struct varlena *attr, int32 *header_size) { const CompressionAmRoutine *cmroutine; CompressionId cmid; @@ -458,10 +489,21 @@ toast_get_compression_handler(struct varlena *attr) { case PGLZ_COMPRESSION_ID: cmroutine = &pglz_compress_methods; + *header_size = TOAST_COMPRESS_HDRSZ; break; case LZ4_COMPRESSION_ID: cmroutine = &lz4_compress_methods; + *header_size = TOAST_COMPRESS_HDRSZ; break; + case CUSTOM_COMPRESSION_ID: + { + toast_compress_header_custom *hdr; + + hdr = (toast_compress_header_custom *) attr; + cmroutine = GetCompressionAmRoutineByAmId(hdr->cmoid); + *header_size = TOAST_CUSTOM_COMPRESS_HDRSZ; + break; + } default: elog(ERROR, "Invalid compression method id %d", cmid); } @@ -477,9 +519,11 @@ toast_get_compression_handler(struct varlena *attr) static struct varlena * toast_decompress_datum(struct varlena *attr) { - const CompressionAmRoutine *cmroutine = toast_get_compression_handler(attr); + int32 header_size; + const CompressionAmRoutine *cmroutine = + toast_get_compression_handler(attr, &header_size); - return cmroutine->datum_decompress(attr); + return cmroutine->datum_decompress(attr, header_size); } @@ -493,16 +537,19 @@ toast_decompress_datum(struct varlena *attr) static struct varlena * toast_decompress_datum_slice(struct varlena *attr, int32 slicelength) { - const CompressionAmRoutine *cmroutine = toast_get_compression_handler(attr); + int32 header_size; + const CompressionAmRoutine *cmroutine = + toast_get_compression_handler(attr, &header_size); /* * If the handler supports the slice decompression then decompress the * slice otherwise decompress complete data. */ if (cmroutine->datum_decompress_slice) - return cmroutine->datum_decompress_slice(attr, slicelength); + return cmroutine->datum_decompress_slice(attr, header_size, + slicelength); else - return cmroutine->datum_decompress(attr); + return cmroutine->datum_decompress(attr, header_size); } /* ---------- diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index 72bf23f712..cd91aefef7 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -48,6 +48,7 @@ toast_compress_datum(Datum value, Oid cmoid) { struct varlena *tmp = NULL; int32 valsize; + bool isCustomCompression = false; const CompressionAmRoutine *cmroutine = NULL; Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value))); @@ -65,11 +66,16 @@ toast_compress_datum(Datum value, Oid cmoid) cmroutine = &lz4_compress_methods; break; default: - elog(ERROR, "Invalid compression method oid %u", cmoid); + isCustomCompression = true; + cmroutine = GetCompressionAmRoutineByAmId(cmoid); + break; } /* Call the actual compression function */ - tmp = cmroutine->datum_compress((const struct varlena *) value); + tmp = cmroutine->datum_compress((const struct varlena *)value, + isCustomCompression ? + TOAST_CUSTOM_COMPRESS_HDRSZ : + TOAST_COMPRESS_HDRSZ); if (!tmp) return PointerGetDatum(NULL); @@ -88,7 +94,14 @@ toast_compress_datum(Datum value, Oid cmoid) if (VARSIZE(tmp) < valsize - 2) { /* successful compression */ - TOAST_COMPRESS_SET_SIZE_AND_METHOD(tmp, valsize, CompressionOidToId(cmoid)); + TOAST_COMPRESS_SET_SIZE_AND_METHOD(tmp, valsize, isCustomCompression ? + CUSTOM_COMPRESSION_ID : + CompressionOidToId(cmoid)); + + /* For custom compression, set the oid of the compression method */ + if (isCustomCompression) + TOAST_COMPRESS_SET_CMOID(tmp, cmoid); + return PointerGetDatum(tmp); } else diff --git a/src/backend/access/compression/compress_lz4.c b/src/backend/access/compression/compress_lz4.c index b455367be3..ce27616116 100644 --- a/src/backend/access/compression/compress_lz4.c +++ b/src/backend/access/compression/compress_lz4.c @@ -29,7 +29,7 @@ * compressed varlena, or NULL if compression fails. */ static struct varlena * -lz4_cmcompress(const struct varlena *value) +lz4_cmcompress(const struct varlena *value, int32 header_size) { #ifndef HAVE_LIBLZ4 ereport(ERROR, @@ -44,10 +44,10 @@ lz4_cmcompress(const struct varlena *value) valsize = VARSIZE_ANY_EXHDR(value); max_size = LZ4_compressBound(VARSIZE_ANY_EXHDR(value)); - tmp = (struct varlena *) palloc(max_size + TOAST_COMPRESS_HDRSZ); + tmp = (struct varlena *) palloc(max_size + header_size); len = LZ4_compress_default(VARDATA_ANY(value), - (char *) tmp + TOAST_COMPRESS_HDRSZ, + (char *) tmp + header_size, valsize, max_size); if (len <= 0) { @@ -55,7 +55,7 @@ lz4_cmcompress(const struct varlena *value) elog(ERROR, "lz4: could not compress data"); } - SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ); + SET_VARSIZE_COMPRESSED(tmp, len + header_size); return tmp; #endif @@ -67,7 +67,7 @@ lz4_cmcompress(const struct varlena *value) * Returns the decompressed varlena. */ static struct varlena * -lz4_cmdecompress(const struct varlena *value) +lz4_cmdecompress(const struct varlena *value, int32 header_size) { #ifndef HAVE_LIBLZ4 ereport(ERROR, @@ -80,9 +80,9 @@ lz4_cmdecompress(const struct varlena *value) result = (struct varlena *) palloc(TOAST_COMPRESS_RAWSIZE(value) + VARHDRSZ); SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(value) + VARHDRSZ); - rawsize = LZ4_decompress_safe(TOAST_COMPRESS_RAWDATA(value), + rawsize = LZ4_decompress_safe((char *) value + header_size, VARDATA(result), - VARSIZE(value) - TOAST_COMPRESS_HDRSZ, + VARSIZE(value) - header_size, TOAST_COMPRESS_RAWSIZE(value)); if (rawsize < 0) elog(ERROR, "lz4: compressed data is corrupted"); @@ -99,7 +99,8 @@ lz4_cmdecompress(const struct varlena *value) * Decompresses part of the data. Returns the decompressed varlena. */ static struct varlena * -lz4_cmdecompress_slice(const struct varlena *value, int32 slicelength) +lz4_cmdecompress_slice(const struct varlena *value, int32 header_size, + int32 slicelength) { #ifndef HAVE_LIBLZ4 ereport(ERROR, @@ -111,9 +112,9 @@ lz4_cmdecompress_slice(const struct varlena *value, int32 slicelength) result = (struct varlena *) palloc(TOAST_COMPRESS_RAWSIZE(value) + VARHDRSZ); - rawsize = LZ4_decompress_safe_partial(TOAST_COMPRESS_RAWDATA(value), + rawsize = LZ4_decompress_safe_partial((char *) value + header_size, VARDATA(result), - VARSIZE(value) - TOAST_COMPRESS_HDRSZ, + VARSIZE(value) - header_size, slicelength, TOAST_COMPRESS_RAWSIZE(value)); if (rawsize < 0) diff --git a/src/backend/access/compression/compress_pglz.c b/src/backend/access/compression/compress_pglz.c index 2a3ef17842..d693c880fd 100644 --- a/src/backend/access/compression/compress_pglz.c +++ b/src/backend/access/compression/compress_pglz.c @@ -27,7 +27,7 @@ * compressed varlena, or NULL if compression fails. */ static struct varlena * -pglz_cmcompress(const struct varlena *value) +pglz_cmcompress(const struct varlena *value, int32 header_size) { int32 valsize, len; @@ -44,16 +44,16 @@ pglz_cmcompress(const struct varlena *value) return NULL; tmp = (struct varlena *) palloc(PGLZ_MAX_OUTPUT(valsize) + - TOAST_COMPRESS_HDRSZ); + header_size); len = pglz_compress(VARDATA_ANY(DatumGetPointer(value)), valsize, - TOAST_COMPRESS_RAWDATA(tmp), + (char *) tmp + header_size, NULL); if (len >= 0) { - SET_VARSIZE_COMPRESSED(tmp, len + TOAST_COMPRESS_HDRSZ); + SET_VARSIZE_COMPRESSED(tmp, len + header_size); return tmp; } @@ -68,7 +68,7 @@ pglz_cmcompress(const struct varlena *value) * Returns the decompressed varlena. */ static struct varlena * -pglz_cmdecompress(const struct varlena *value) +pglz_cmdecompress(const struct varlena *value, int32 header_size) { struct varlena *result; int32 rawsize; @@ -76,8 +76,8 @@ pglz_cmdecompress(const struct varlena *value) result = (struct varlena *) palloc(TOAST_COMPRESS_RAWSIZE(value) + VARHDRSZ); SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(value) + VARHDRSZ); - rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(value), - TOAST_COMPRESS_SIZE(value), + rawsize = pglz_decompress((char *) value + header_size, + VARSIZE(value) - header_size, VARDATA(result), TOAST_COMPRESS_RAWSIZE(value), true); @@ -95,7 +95,7 @@ pglz_cmdecompress(const struct varlena *value) * Decompresses part of the data. Returns the decompressed varlena. */ static struct varlena * -pglz_cmdecompress_slice(const struct varlena *value, +pglz_cmdecompress_slice(const struct varlena *value, int32 header_size, int32 slicelength) { struct varlena *result; @@ -103,8 +103,8 @@ pglz_cmdecompress_slice(const struct varlena *value, result = (struct varlena *) palloc(slicelength + VARHDRSZ); - rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(value), - VARSIZE(value) - TOAST_COMPRESS_HDRSZ, + rawsize = pglz_decompress((char *) value + header_size, + VARSIZE(value) - header_size, VARDATA(result), slicelength, false); diff --git a/src/backend/access/compression/compressamapi.c b/src/backend/access/compression/compressamapi.c index 663102c8d2..9d96fad31f 100644 --- a/src/backend/access/compression/compressamapi.c +++ b/src/backend/access/compression/compressamapi.c @@ -19,6 +19,7 @@ #include "access/htup_details.h" #include "access/reloptions.h" #include "access/table.h" +#include "catalog/pg_am.h" #include "utils/fmgroids.h" #include "utils/syscache.h" @@ -59,3 +60,49 @@ CompressionIdToOid(CompressionId cmid) elog(ERROR, "Invalid compression method id %d", cmid); } } + +/* + * GetCompressionAmRoutineByAmId - look up the handler of the compression access + * method with the given OID, and get its CompressionAmRoutine struct. + */ +CompressionAmRoutine * +GetCompressionAmRoutineByAmId(Oid amoid) +{ + HeapTuple tuple; + Form_pg_am amform; + regproc amhandler; + Datum datum; + CompressionAmRoutine *routine; + + /* Get handler function OID for the access method */ + tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(amoid)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for access method %u", + amoid); + + amform = (Form_pg_am)GETSTRUCT(tuple); + + /* Check if it's an index access method as opposed to some other AM */ + if (amform->amtype != AMTYPE_COMPRESSION) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("access method \"%s\" is not of type %s", + NameStr(amform->amname), "INDEX"))); + + amhandler = amform->amhandler; + + /* Complain if handler OID is invalid */ + if (!RegProcedureIsValid(amhandler)) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("index access method \"%s\" does not have a handler", + NameStr(amform->amname)))); + + ReleaseSysCache(tuple); + + /* And finally, call the handler function to get the API struct */ + datum = OidFunctionCall0(amhandler); + routine = (CompressionAmRoutine *) DatumGetPointer(datum); + + return routine; +} diff --git a/src/backend/commands/amcmds.c b/src/backend/commands/amcmds.c index fa8deda08b..bdd6d0a8ac 100644 --- a/src/backend/commands/amcmds.c +++ b/src/backend/commands/amcmds.c @@ -227,6 +227,8 @@ get_am_type_string(char amtype) return "INDEX"; case AMTYPE_TABLE: return "TABLE"; + case AMTYPE_COMPRESSION: + return "TABLE"; default: /* shouldn't happen */ elog(ERROR, "invalid access method type '%c'", amtype); @@ -264,6 +266,9 @@ lookup_am_handler_func(List *handler_name, char amtype) case AMTYPE_TABLE: expectedType = TABLE_AM_HANDLEROID; break; + case AMTYPE_COMPRESSION: + expectedType = COMPRESSION_AM_HANDLEROID; + break; default: elog(ERROR, "unrecognized access method type \"%c\"", amtype); } diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 53a4e3131c..248b27f564 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -1969,7 +1969,7 @@ CompareCompressionMethodAndDecompress(TupleTableSlot *slot, * the target compression method is not same then we need to * decompress it. */ - cmoid = CompressionIdToOid(TOAST_COMPRESS_METHOD(new_value)); + cmoid = toast_get_compression_oid(new_value); if (!IsCompressionSupported(&targetTupDesc->attrs[i], cmoid)) { new_value = detoast_attr(new_value); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 22bd0e87a3..54ff6fb937 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -5254,6 +5254,7 @@ CreateAmStmt: CREATE ACCESS METHOD name TYPE_P am_type HANDLER handler_name am_type: INDEX { $$ = AMTYPE_INDEX; } | TABLE { $$ = AMTYPE_TABLE; } + | COMPRESSION { $$ = AMTYPE_COMPRESSION; } ; /***************************************************************************** diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index d6a36f7e97..ed8169a01f 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -5285,7 +5285,6 @@ Datum pg_column_compression(PG_FUNCTION_ARGS) { Datum value = PG_GETARG_DATUM(0); - char *compression; int typlen; struct varlena *varvalue; @@ -5315,10 +5314,8 @@ pg_column_compression(PG_FUNCTION_ARGS) varvalue = (struct varlena *) DatumGetPointer(value); - compression = - get_am_name(CompressionIdToOid(TOAST_COMPRESS_METHOD(varvalue))); - - PG_RETURN_TEXT_P(cstring_to_text(compression)); + PG_RETURN_TEXT_P(cstring_to_text(get_am_name( + toast_get_compression_oid(varvalue)))); } /* diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 201955fc3d..923dfa6451 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -13035,6 +13035,9 @@ dumpAccessMethod(Archive *fout, AccessMethodInfo *aminfo) case AMTYPE_TABLE: appendPQExpBufferStr(q, "TYPE TABLE "); break; + case AMTYPE_COMPRESSION: + appendPQExpBufferStr(q, "TYPE COMPRESSION "); + break; default: pg_log_warning("invalid type \"%c\" of access method \"%s\"", aminfo->amtype, qamname); diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index 8178a5124b..9bfa66ff09 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -883,6 +883,12 @@ static const SchemaQuery Query_for_list_of_statistics = { " WHERE substring(pg_catalog.quote_ident(amname),1,%d)='%s' AND "\ " amtype=" CppAsString2(AMTYPE_TABLE) +#define Query_for_list_of_compression_access_methods \ +" SELECT pg_catalog.quote_ident(amname) "\ +" FROM pg_catalog.pg_am "\ +" WHERE substring(pg_catalog.quote_ident(amname),1,%d)='%s' AND "\ +" amtype=" CppAsString2(AMTYPE_COMPRESSION) + /* the silly-looking length condition is just to eat up the current word */ #define Query_for_list_of_arguments \ "SELECT pg_catalog.oidvectortypes(proargtypes)||')' "\ diff --git a/src/include/access/compressamapi.h b/src/include/access/compressamapi.h index 8226ae0596..913a633d83 100644 --- a/src/include/access/compressamapi.h +++ b/src/include/access/compressamapi.h @@ -26,18 +26,25 @@ typedef enum CompressionId { PGLZ_COMPRESSION_ID = 0, - LZ4_COMPRESSION_ID = 1 + LZ4_COMPRESSION_ID = 1, + /* one free slot for the future built-in method */ + CUSTOM_COMPRESSION_ID = 3 } CompressionId; /* Use default compression method if it is not specified. */ #define DefaultCompressionOid PGLZ_COMPRESSION_AM_OID +#define IsCustomCompression(cmid) ((cmid) == CUSTOM_COMPRESSION_ID) #define IsStorageCompressible(storage) ((storage) != TYPSTORAGE_PLAIN && \ (storage) != TYPSTORAGE_EXTERNAL) /* compression handler routines */ -typedef struct varlena *(*cmcompress_function) (const struct varlena *value); -typedef struct varlena *(*cmdecompress_function) (const struct varlena *value); +typedef struct varlena *(*cmcompress_function) (const struct varlena *value, + int32 toast_header_size); +typedef struct varlena *(*cmdecompress_function) (const struct varlena *value, + int32 toast_header_size); typedef struct varlena *(*cmdecompress_slice_function) - (const struct varlena *value, int32 slicelength); + (const struct varlena *value, + int32 toast_header_size, + int32 slicelength); /* * API struct for a compression AM. @@ -61,5 +68,6 @@ extern const CompressionAmRoutine lz4_compress_methods; /* access/compression/compressamapi.c */ extern CompressionId CompressionOidToId(Oid cmoid); extern Oid CompressionIdToOid(CompressionId cmid); +extern CompressionAmRoutine *GetCompressionAmRoutineByAmId(Oid amoid); #endif /* COMPRESSAMAPI_H */ diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h index 86bad7e78c..e6b3ec90b5 100644 --- a/src/include/access/detoast.h +++ b/src/include/access/detoast.h @@ -89,4 +89,12 @@ extern Size toast_raw_datum_size(Datum value); */ extern Size toast_datum_size(Datum value); +/* ---------- + * toast_get_compression_oid - + * + * Return the compression method oid from the compressed value + * ---------- + */ +extern Oid toast_get_compression_oid(struct varlena *attr); + #endif /* DETOAST_H */ diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h index e13e34cac3..81944912d2 100644 --- a/src/include/access/toast_internals.h +++ b/src/include/access/toast_internals.h @@ -27,6 +27,17 @@ typedef struct toast_compress_header * rawsize */ } toast_compress_header; +/* + * If the compression method were used, then data also contains + * Oid of compression options + */ +typedef struct toast_compress_header_custom +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + uint32 info; /* 2 bits for compression method + rawsize */ + Oid cmoid; /* Oid from pg_compression */ +} toast_compress_header_custom; + #define RAWSIZEMASK (0x3FFFFFFFU) /* @@ -38,6 +49,7 @@ typedef struct toast_compress_header * two highest bits. */ #define TOAST_COMPRESS_HDRSZ ((int32) sizeof(toast_compress_header)) +#define TOAST_CUSTOM_COMPRESS_HDRSZ ((int32)sizeof(toast_compress_header_custom)) #define TOAST_COMPRESS_RAWSIZE(ptr) (((toast_compress_header *) (ptr))->info & RAWSIZEMASK) #define TOAST_COMPRESS_METHOD(ptr) (((toast_compress_header *) (ptr))->info >> 30) #define TOAST_COMPRESS_SIZE(ptr) ((int32) VARSIZE_ANY(ptr) - TOAST_COMPRESS_HDRSZ) @@ -49,6 +61,9 @@ typedef struct toast_compress_header ((toast_compress_header *) (ptr))->info = ((len) | (cm_method) << 30); \ } while (0) +#define TOAST_COMPRESS_SET_CMOID(ptr, oid) \ + (((toast_compress_header_custom *)(ptr))->cmoid = (oid)) + extern Datum toast_compress_datum(Datum value, Oid cmoid); extern Oid toast_get_valid_index(Oid toastoid, LOCKMODE lock); diff --git a/src/test/regress/expected/compression.out b/src/test/regress/expected/compression.out index 9d87ec4ad7..82fd2d1a99 100644 --- a/src/test/regress/expected/compression.out +++ b/src/test/regress/expected/compression.out @@ -235,13 +235,51 @@ SELECT pg_column_compression(f1) FROM cmdata; pglz (2 rows) +-- create compression method +CREATE ACCESS METHOD pglz2 TYPE COMPRESSION HANDLER pglzhandler; +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION pglz2 PRESERVE ALL; +INSERT INTO cmdata VALUES (repeat('1234567890',1004)); +\d+ cmdata + Table "public.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | pglz2 | | +Indexes: + "idx" btree (f1) + +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + lz4 + pglz + pglz2 +(3 rows) + +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4 PRESERVE (pglz2); +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + lz4 + lz4 + pglz2 +(3 rows) + +\d+ cmdata + Table "public.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | lz4 | | +Indexes: + "idx" btree (f1) + -- check data is ok SELECT length(f1) FROM cmdata; length -------- 10000 10040 -(2 rows) + 10040 +(3 rows) SELECT length(f1) FROM cmdata1; length @@ -270,3 +308,4 @@ SELECT length(f1) FROM cmmove3; DROP MATERIALIZED VIEW mv; DROP TABLE cmdata, cmdata1, cmmove1, cmmove2, cmmove3, cmpart; +DROP ACCESS METHOD pglz2; diff --git a/src/test/regress/sql/compression.sql b/src/test/regress/sql/compression.sql index 9a9c3d95e7..8075fb7696 100644 --- a/src/test/regress/sql/compression.sql +++ b/src/test/regress/sql/compression.sql @@ -95,6 +95,16 @@ SELECT pg_column_compression(f1) FROM cmdata; ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4 PRESERVE ALL; SELECT pg_column_compression(f1) FROM cmdata; +-- create compression method +CREATE ACCESS METHOD pglz2 TYPE COMPRESSION HANDLER pglzhandler; +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION pglz2 PRESERVE ALL; +INSERT INTO cmdata VALUES (repeat('1234567890',1004)); +\d+ cmdata +SELECT pg_column_compression(f1) FROM cmdata; +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4 PRESERVE (pglz2); +SELECT pg_column_compression(f1) FROM cmdata; +\d+ cmdata + -- check data is ok SELECT length(f1) FROM cmdata; SELECT length(f1) FROM cmdata1; @@ -104,3 +114,4 @@ SELECT length(f1) FROM cmmove3; DROP MATERIALIZED VIEW mv; DROP TABLE cmdata, cmdata1, cmmove1, cmmove2, cmmove3, cmpart; +DROP ACCESS METHOD pglz2; -- 2.23.0