Home > mailing lists

Re: [REVIEW] Re: Compression of full-page-writes - Mailing list pgsql-hackers

From	Andres Freund
Subject	Re: [REVIEW] Re: Compression of full-page-writes
Date	July 11, 2014 06:30:57
Msg-id	20140711063049.GH17261@awork2.anarazel.de Whole thread Raw
In response to	Re: [REVIEW] Re: Compression of full-page-writes (Rahila Syed <rahilasyed90@gmail.com>)
Responses	Re: [REVIEW] Re: Compression of full-page-writes Re: [REVIEW] Re: Compression of full-page-writes
List	pgsql-hackers

Tree view

On 2014-07-04 19:27:10 +0530, Rahila Syed wrote:
> +    /* Allocates memory for compressed backup blocks according to the compression
> +     * algorithm used.Once per session at the time of insertion of first XLOG
> +     * record.
> +     * This memory stays till the end of session. OOM is handled by making the
> +     * code proceed without FPW compression*/
> +    static char *compressed_pages[XLR_MAX_BKP_BLOCKS];
> +    static bool compressed_pages_allocated = false;
> +    if (compress_backup_block != BACKUP_BLOCK_COMPRESSION_OFF &&
> +        compressed_pages_allocated!= true)
> +    {
> +        size_t buffer_size = VARHDRSZ;
> +        int j;
> +        if (compress_backup_block == BACKUP_BLOCK_COMPRESSION_SNAPPY)
> +            buffer_size += snappy_max_compressed_length(BLCKSZ);
> +        else if (compress_backup_block == BACKUP_BLOCK_COMPRESSION_LZ4)
> +            buffer_size += LZ4_compressBound(BLCKSZ);
> +        else if (compress_backup_block == BACKUP_BLOCK_COMPRESSION_PGLZ)
> +            buffer_size += PGLZ_MAX_OUTPUT(BLCKSZ);
> +        for (j = 0; j < XLR_MAX_BKP_BLOCKS; j++)
> +        {    compressed_pages[j] = (char *) malloc(buffer_size);
> +            if(compressed_pages[j] == NULL)
> +            {
> +                compress_backup_block=BACKUP_BLOCK_COMPRESSION_OFF;
> +                break;
> +            }
> +        }
> +        compressed_pages_allocated = true;
> +    }

Why not do this in InitXLOGAccess() or similar?

>      /*
>       * Make additional rdata chain entries for the backup blocks, so that we
>       * don't need to special-case them in the write loop.  This modifies the
> @@ -1015,11 +1048,32 @@ begin:;
>          rdt->next = &(dtbuf_rdt2[i]);
>          rdt = rdt->next;
>  
> +        if (compress_backup_block != BACKUP_BLOCK_COMPRESSION_OFF)
> +        {
> +        /* Compress the backup block before including it in rdata chain */
> +            rdt->data = CompressBackupBlock(page, BLCKSZ - bkpb->hole_length,
> +                                            compressed_pages[i], &(rdt->len));
> +            if (rdt->data != NULL)
> +            {
> +                /*
> +                 * write_len is the length of compressed block and its varlena
> +                 * header
> +                 */
> +                write_len += rdt->len;
> +                bkpb->hole_length = BLCKSZ - rdt->len;
> +                /*Adding information about compression in the backup block header*/
> +                bkpb->block_compression=compress_backup_block;
> +                rdt->next = NULL;
> +                continue;
> +            }
> +        }
> +

So, you're compressing backup blocks one by one. I wonder if that's the
right idea and if we shouldn't instead compress all of them in one run to
increase the compression ratio.


> +/*
>   * Get a pointer to the right location in the WAL buffer containing the
>   * given XLogRecPtr.
>   *
> @@ -4061,6 +4174,50 @@ RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk,
>      {
>          memcpy((char *) page, blk, BLCKSZ);
>      }
> +    /* Decompress if backup block is compressed*/
> +    else if (VARATT_IS_COMPRESSED((struct varlena *) blk)
> +                && bkpb.block_compression!=BACKUP_BLOCK_COMPRESSION_OFF)
> +    {
> +        if (bkpb.block_compression == BACKUP_BLOCK_COMPRESSION_SNAPPY)
> +        {
> +            int ret;
> +            size_t compressed_length = VARSIZE((struct varlena *) blk) - VARHDRSZ;
> +            char *compressed_data = (char *)VARDATA((struct varlena *) blk);
> +            size_t s_uncompressed_length;
> +
> +            ret = snappy_uncompressed_length(compressed_data,
> +                    compressed_length,
> +                    &s_uncompressed_length);
> +            if (!ret)
> +                elog(ERROR, "snappy: failed to determine compression length");
> +            if (BLCKSZ != s_uncompressed_length)
> +                elog(ERROR, "snappy: compression size mismatch %d != %zu",
> +                        BLCKSZ, s_uncompressed_length);
> +
> +            ret = snappy_uncompress(compressed_data,
> +                    compressed_length,
> +                    page);
> +            if (ret != 0)
> +                elog(ERROR, "snappy: decompression failed: %d", ret);
> +        }
> +        else if (bkpb.block_compression == BACKUP_BLOCK_COMPRESSION_LZ4)
> +        {
> +            int ret;
> +            size_t compressed_length = VARSIZE((struct varlena *) blk) - VARHDRSZ;
> +            char *compressed_data = (char *)VARDATA((struct varlena *) blk);
> +            ret = LZ4_decompress_fast(compressed_data, page,
> +                    BLCKSZ);
> +            if (ret != compressed_length)
> +                elog(ERROR, "lz4: decompression size mismatch: %d vs %zu", ret,
> +                        compressed_length);
> +        }
> +        else if (bkpb.block_compression == BACKUP_BLOCK_COMPRESSION_PGLZ)
> +        {
> +            pglz_decompress((PGLZ_Header *) blk, (char *) page);
> +        }
> +        else
> +            elog(ERROR, "Wrong value for compress_backup_block GUC");
> +    }
>      else
>      {
>          memcpy((char *) page, blk, bkpb.hole_offset);

So why aren't we compressing the hole here instead of compressing the
parts that the current logic deems to be filled with important information?

>  /*
>   * Options for enum values stored in other modules
>   */
> @@ -3498,6 +3512,16 @@ static struct config_enum ConfigureNamesEnum[] =
>          NULL, NULL, NULL
>      },
>  
> +    {
> +        {"compress_backup_block", PGC_SIGHUP, WAL_SETTINGS,
> +            gettext_noop("Compress backup block in WAL using specified compression algorithm."),
> +            NULL
> +        },
> +        &compress_backup_block,
> +        BACKUP_BLOCK_COMPRESSION_OFF, backup_block_compression_options,
> +        NULL, NULL, NULL
> +    },
> +

This should be named 'compress_full_page_writes' or so, even if a
temporary guc. There's the 'full_page_writes' guc and I see little
reaason to deviate from its name.

Greetings,

Andres Freund

pgsql-hackers by date:

From: Fujii Masao
Date: 11 July 2014, 06:21:43
Subject: Re: Minmax indexes

From: Jeff Davis
Date: 11 July 2014, 06:43:27
Subject: Re: Request for Patch Feedback: Lag & Lead Window Functions Can Ignore Nulls

Re: [REVIEW] Re: Compression of full-page-writes - Mailing list pgsql-hackers

Previous

Next