diff --git a/doc/src/sgml/ref/pg_verify_checksums.sgml b/doc/src/sgml/ref/pg_verify_checksums.sgml index 905b8f1222..a565cb52ae 100644 --- a/doc/src/sgml/ref/pg_verify_checksums.sgml +++ b/doc/src/sgml/ref/pg_verify_checksums.sgml @@ -16,7 +16,7 @@ PostgreSQL documentation pg_verify_checksums - verify data checksums in a PostgreSQL database cluster + enable, disable or verify data checksums in a PostgreSQL database cluster @@ -25,6 +25,11 @@ PostgreSQL documentation option + + + + + @@ -36,10 +41,18 @@ PostgreSQL documentation Description - pg_verify_checksums verifies data checksums in a - PostgreSQL cluster. The server must be shut - down cleanly before running pg_verify_checksums. - The exit status is zero if there are no checksum errors, otherwise nonzero. + pg_verify_checksums enable, disable or verifies data + checksums in a PostgreSQL cluster. The server + must be shut down cleanly before running + pg_verify_checksums . + The exit status is zero if there are no checksum errors or checksum + enabling/disabled was successful, otherwise nonzero. + + + + While checking or enabling checksums needs to scan or write every file in + the cluster, disabling will only update the pg_control + file. @@ -61,6 +74,36 @@ PostgreSQL documentation + + + + + Verify checksums. + + + + + + + + + + Disable checksums. + + + + + + + + + + Enable checksums. + + + + + diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c index 511262ab5f..07cb5787c5 100644 --- a/src/bin/pg_verify_checksums/pg_verify_checksums.c +++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c @@ -1,11 +1,11 @@ /* - * pg_verify_checksums + * pg_checksums * - * Verifies page level checksums in an offline cluster + * Verifies/enables/disables page level checksums in an offline cluster * * Copyright (c) 2010-2019, PostgreSQL Global Development Group * - * src/bin/pg_verify_checksums/pg_verify_checksums.c + * src/bin/pg_checksums/pg_checksums.c */ #include "postgres_fe.h" @@ -13,15 +13,16 @@ #include #include +#include "access/xlog_internal.h" #include "catalog/pg_control.h" #include "common/controldata_utils.h" +#include "common/file_perm.h" +#include "common/file_utils.h" #include "getopt_long.h" #include "pg_getopt.h" #include "storage/bufpage.h" #include "storage/checksum.h" #include "storage/checksum_impl.h" -#include "storage/fd.h" - static int64 files = 0; static int64 blocks = 0; @@ -31,16 +32,33 @@ static ControlFileData *ControlFile; static char *only_relfilenode = NULL; static bool verbose = false; +typedef enum +{ + PG_ACTION_CHECK, + PG_ACTION_DISABLE, + PG_ACTION_ENABLE +} ChecksumAction; + +/* Filename components */ +#define PG_TEMP_FILES_DIR "pgsql_tmp" +#define PG_TEMP_FILE_PREFIX "pgsql_tmp" + +static ChecksumAction action = PG_ACTION_CHECK; + static const char *progname; static void usage(void) { - printf(_("%s verifies data checksums in a PostgreSQL database cluster.\n\n"), progname); + printf(_("%s enables/disables/verifies data checksums in a PostgreSQL database cluster.\n\n"), progname); printf(_("Usage:\n")); printf(_(" %s [OPTION]... [DATADIR]\n"), progname); printf(_("\nOptions:\n")); printf(_(" [-D, --pgdata=]DATADIR data directory\n")); + printf(_(" -c, --check check data checksums\n")); + printf(_(" -d, --disable disable data checksums\n")); + printf(_(" -e, --enable enable data checksums\n")); + printf(_(" \"check\", \"enable\" and \"disable\"\n")); printf(_(" -v, --verbose output verbose messages\n")); printf(_(" -r RELFILENODE check only relation with specified relfilenode\n")); printf(_(" -V, --version output version information, then exit\n")); @@ -80,6 +98,77 @@ skipfile(const char *fn) } static void +updateControlFile(char *DataDir, ControlFileData *ControlFile) +{ + int fd; + char buffer[PG_CONTROL_FILE_SIZE]; + char ControlFilePath[MAXPGPATH]; + + Assert(action == PG_ACTION_ENABLE || + action == PG_ACTION_DISABLE); + + /* + * For good luck, apply the same static assertions as in backend's + * WriteControlFile(). + */ +#if PG_VERSION_NUM >= 100000 + StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_MAX_SAFE_SIZE, + "pg_control is too large for atomic disk writes"); +#endif + StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_FILE_SIZE, + "sizeof(ControlFileData) exceeds PG_CONTROL_FILE_SIZE"); + + /* Recalculate CRC of control file */ + INIT_CRC32C(ControlFile->crc); + COMP_CRC32C(ControlFile->crc, + (char *) ControlFile, + offsetof(ControlFileData, crc)); + FIN_CRC32C(ControlFile->crc); + + /* + * Write out PG_CONTROL_FILE_SIZE bytes into pg_control by zero-padding + * the excess over sizeof(ControlFileData), to avoid premature EOF related + * errors when reading it. + */ + memset(buffer, 0, PG_CONTROL_FILE_SIZE); + memcpy(buffer, ControlFile, sizeof(ControlFileData)); + + snprintf(ControlFilePath, sizeof(ControlFilePath), "%s/%s", DataDir, XLOG_CONTROL_FILE); + + fd = open(ControlFilePath, O_WRONLY | PG_BINARY, + pg_file_create_mode); + if (fd < 0) + { + fprintf(stderr, _("%s: could not open control file: %s\n"), + progname, strerror(errno)); + exit(1); + } + + errno = 0; + if (write(fd, buffer, PG_CONTROL_FILE_SIZE) != PG_CONTROL_FILE_SIZE) + { + /* if write didn't set errno, assume problem is no disk space */ + if (errno == 0) + errno = ENOSPC; + fprintf(stderr, _("%s: could not write control file: %s\n"), + progname, strerror(errno)); + exit(1); + } + + if (fsync(fd) != 0) + { + fprintf(stderr, _("%s: fsync error: %s\n"), progname, strerror(errno)); + exit(1); + } + + if (close(fd) < 0) + { + fprintf(stderr, _("%s: could not close control file: %s\n"), progname, strerror(errno)); + exit(1); + } +} + +static void scan_file(const char *fn, BlockNumber segmentno) { PGAlignedBlock buf; @@ -87,7 +176,11 @@ scan_file(const char *fn, BlockNumber segmentno) int f; BlockNumber blockno; - f = open(fn, O_RDONLY | PG_BINARY, 0); + Assert(action == PG_ACTION_ENABLE || + action == PG_ACTION_CHECK); + + f = open(fn, action == PG_ACTION_ENABLE ? O_RDWR : O_RDONLY | PG_BINARY, 0); + if (f < 0) { fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), @@ -117,18 +210,47 @@ scan_file(const char *fn, BlockNumber segmentno) continue; csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE); - if (csum != header->pd_checksum) + if (action == PG_ACTION_CHECK) { - if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION) - fprintf(stderr, _("%s: checksum verification failed in file \"%s\", block %u: calculated checksum %X but block contains %X\n"), - progname, fn, blockno, csum, header->pd_checksum); - badblocks++; + if (csum != header->pd_checksum) + { + if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION) + fprintf(stderr, _("%s: checksum verification failed in file \"%s\", block %u: calculated checksum %X but block contains %X\n"), + progname, fn, blockno, csum, header->pd_checksum); + badblocks++; + } + } + else if (action == PG_ACTION_ENABLE) + { + /* Set checksum in page header */ + header->pd_checksum = csum; + + /* Seek back to beginning of block */ + if (lseek(f, -BLCKSZ, SEEK_CUR) < 0) + { + fprintf(stderr, _("%s: seek failed for block %d in file \"%s\": %s\n"), progname, blockno, fn, strerror(errno)); + exit(1); + } + + /* Write block with checksum */ + if (write(f, buf.data, BLCKSZ) != BLCKSZ) + { + fprintf(stderr, "%s: could not update checksum of block %d in file \"%s\": %s\n", + progname, blockno, fn, strerror(errno)); + exit(1); + } } } if (verbose) - fprintf(stderr, - _("%s: checksums verified in file \"%s\"\n"), progname, fn); + { + if (action == PG_ACTION_CHECK) + fprintf(stderr, + _("%s: checksums verified in file \"%s\"\n"), progname, fn); + if (action == PG_ACTION_ENABLE) + fprintf(stderr, + _("%s: checksums enabled in file \"%s\"\n"), progname, fn); + } close(f); } @@ -230,17 +352,22 @@ int main(int argc, char *argv[]) { static struct option long_options[] = { + {"check", no_argument, NULL, 'c'}, {"pgdata", required_argument, NULL, 'D'}, + {"disable", no_argument, NULL, 'd'}, + {"enable", no_argument, NULL, 'e'}, {"verbose", no_argument, NULL, 'v'}, {NULL, 0, NULL, 0} }; char *DataDir = NULL; + char pid_file[MAXPGPATH]; int c; int option_index; + int pidf; bool crc_ok; - set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_verify_checksums")); + set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_checksums")); progname = get_progname(argv[0]); @@ -253,15 +380,24 @@ main(int argc, char *argv[]) } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { - puts("pg_verify_checksums (PostgreSQL) " PG_VERSION); + puts("pg_checksums (PostgreSQL) " PG_VERSION); exit(0); } } - while ((c = getopt_long(argc, argv, "D:r:v", long_options, &option_index)) != -1) + while ((c = getopt_long(argc, argv, "cD:der:v", long_options, &option_index)) != -1) { switch (c) { + case 'c': + action = PG_ACTION_CHECK; + break; + case 'd': + action = PG_ACTION_DISABLE; + break; + case 'e': + action = PG_ACTION_ENABLE; + break; case 'v': verbose = true; break; @@ -308,6 +444,16 @@ main(int argc, char *argv[]) exit(1); } + /* Relfilenode checking only works in check mode */ + if (action != PG_ACTION_CHECK && + only_relfilenode) + { + fprintf(stderr, _("%s: relfilenode option only possible with check action\n"), progname); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + /* Check if cluster is running */ ControlFile = get_controlfile(DataDir, progname, &crc_ok); if (!crc_ok) @@ -319,29 +465,85 @@ main(int argc, char *argv[]) if (ControlFile->state != DB_SHUTDOWNED && ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY) { - fprintf(stderr, _("%s: cluster must be shut down to verify checksums\n"), progname); + fprintf(stderr, _("%s: cluster must be shut down\n"), progname); + exit(1); + } + + /* Also check for postmaster.pid file */ + snprintf(pid_file, sizeof(pid_file), "%s/postmaster.pid", DataDir); + pidf = open(pid_file, O_RDONLY, 0); + if (pidf < 0) + { + /* + * if the errno is ENOENT, there is no pid file which is what we + * expect. Otherwise, it exits but we cannot open it so exit with + * failure. + */ + if (errno != ENOENT) + { + fprintf(stderr, _("%s: postmaster.pid cannot be opened for reading: %s\n"), + progname, strerror(errno)); + exit(1); + } + } + else + { + fprintf(stderr, _("%s: postmaster.pid exists, cluster must be shut down\n"), progname); exit(1); } - if (ControlFile->data_checksum_version == 0) + if (ControlFile->data_checksum_version == 0 && + action == PG_ACTION_CHECK) { fprintf(stderr, _("%s: data checksums are not enabled in cluster\n"), progname); exit(1); } + if (ControlFile->data_checksum_version == 0 && + action == PG_ACTION_DISABLE) + { + fprintf(stderr, _("%s: data checksums are already disabled in cluster.\n"), progname); + exit(1); + } + if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION && + action == PG_ACTION_ENABLE) + { + fprintf(stderr, _("%s: data checksums are already enabled in cluster.\n"), progname); + exit(1); + } - /* Scan all files */ - scan_directory(DataDir, "global"); - scan_directory(DataDir, "base"); - scan_directory(DataDir, "pg_tblspc"); + if (action == PG_ACTION_CHECK || action == PG_ACTION_ENABLE) + { + /* Operate on all files */ + scan_directory(DataDir, "global"); + scan_directory(DataDir, "base"); + scan_directory(DataDir, "pg_tblspc"); + + printf(_("Checksum operation completed\n")); + printf(_("Files scanned: %s\n"), psprintf(INT64_FORMAT, files)); + printf(_("Blocks scanned: %s\n"), psprintf(INT64_FORMAT, blocks)); + if (action == PG_ACTION_CHECK) + { + printf(_("Bad checksums: %s\n"), psprintf(INT64_FORMAT, badblocks)); + printf(_("Data checksum version: %d\n"), ControlFile->data_checksum_version); - printf(_("Checksum scan completed\n")); - printf(_("Data checksum version: %d\n"), ControlFile->data_checksum_version); - printf(_("Files scanned: %s\n"), psprintf(INT64_FORMAT, files)); - printf(_("Blocks scanned: %s\n"), psprintf(INT64_FORMAT, blocks)); - printf(_("Bad checksums: %s\n"), psprintf(INT64_FORMAT, badblocks)); + if (badblocks > 0) + return 1; + } + } - if (badblocks > 0) - return 1; + if (action == PG_ACTION_ENABLE || action == PG_ACTION_DISABLE) + { + /* Update control file */ + ControlFile->data_checksum_version = action == PG_ACTION_ENABLE ? PG_DATA_CHECKSUM_VERSION : 0; + updateControlFile(DataDir, ControlFile); + fsync_pgdata(DataDir, progname, PG_VERSION_NUM); + if (verbose) + printf(_("Data checksum version: %d\n"), ControlFile->data_checksum_version); + if (action == PG_ACTION_ENABLE) + printf(_("Checksums enabled in cluster\n")); + else + printf(_("Checksums disabled in cluster\n")); + } return 0; } diff --git a/src/bin/pg_verify_checksums/t/002_actions.pl b/src/bin/pg_verify_checksums/t/002_actions.pl index 5250b5a728..af20c60445 100644 --- a/src/bin/pg_verify_checksums/t/002_actions.pl +++ b/src/bin/pg_verify_checksums/t/002_actions.pl @@ -5,7 +5,7 @@ use strict; use warnings; use PostgresNode; use TestLib; -use Test::More tests => 45; +use Test::More tests => 59; # Utility routine to create and check a table with corrupted checksums @@ -38,8 +38,8 @@ sub check_relation_corruption # Checksums are correct for single relfilenode as the table is not # corrupted yet. - command_ok(['pg_verify_checksums', '-D', $pgdata, - '-r', $relfilenode_corrupted], + command_ok(['pg_verify_checksums', '-c', '-D', $pgdata, '-r', + $relfilenode_corrupted], "succeeds for single relfilenode on tablespace $tablespace with offline cluster"); # Time to create some corruption @@ -49,15 +49,15 @@ sub check_relation_corruption close $file; # Checksum checks on single relfilenode fail - $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata, '-r', - $relfilenode_corrupted], + $node->command_checks_all([ 'pg_verify_checksums', '-c', '-D', $pgdata, + '-r', $relfilenode_corrupted], 1, [qr/Bad checksums:.*1/], [qr/checksum verification failed/], "fails with corrupted data for single relfilenode on tablespace $tablespace"); # Global checksum checks fail as well - $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata], + $node->command_checks_all([ 'pg_verify_checksums', '-c', '-D', $pgdata], 1, [qr/Bad checksums:.*1/], [qr/checksum verification failed/], @@ -67,22 +67,22 @@ sub check_relation_corruption $node->start; $node->safe_psql('postgres', "DROP TABLE $table;"); $node->stop; - $node->command_ok(['pg_verify_checksums', '-D', $pgdata], + $node->command_ok(['pg_verify_checksums', '-c', '-D', $pgdata], "succeeds again after table drop on tablespace $tablespace"); $node->start; return; } -# Initialize node with checksums enabled. +# Initialize node with checksums disabled. my $node = get_new_node('node_checksum'); -$node->init(extra => ['--data-checksums']); +$node->init(); my $pgdata = $node->data_dir; -# Control file should know that checksums are enabled. +# Control file should know that checksums are disabled. command_like(['pg_controldata', $pgdata], - qr/Data page checksum version:.*1/, - 'checksums enabled in control file'); + qr/Data page checksum version:.*0/, + 'checksums disabled in control file'); # These are correct but empty files, so they should pass through. append_to_file "$pgdata/global/99999", ""; @@ -100,13 +100,49 @@ append_to_file "$pgdata/global/pgsql_tmp_123", "foo"; mkdir "$pgdata/global/pgsql_tmp"; append_to_file "$pgdata/global/pgsql_tmp/1.1", "foo"; +# Enable checksums +command_ok(['pg_verify_checksums', '-e', '-D', $pgdata], + "checksums successfully enabled in cluster"); + +# Control file should know that checksums are enabled. +command_like(['pg_controldata', $pgdata], + qr/Data page checksum version:.*1/, + 'checksums enabled in control file'); + +# Disable checksums again +command_ok(['pg_verify_checksums', '-d', '-D', $pgdata], + "checksums successfully disabled in cluster"); + +# Control file should know that checksums are disabled. +command_like(['pg_controldata', $pgdata], + qr/Data page checksum version:.*0/, + 'checksums disabled in control file'); + +# Enable checksums again with long option +command_ok(['pg_verify_checksums', '--enable', '-D', $pgdata], + "checksums successfully enabled in cluster"); + +# Control file should know that checksums are enabled. +command_like(['pg_controldata', $pgdata], + qr/Data page checksum version:.*1/, + 'checksums enabled in control file'); + # Checksums pass on a newly-created cluster -command_ok(['pg_verify_checksums', '-D', $pgdata], +command_ok(['pg_verify_checksums', '-c', '-D', $pgdata], "succeeds with offline cluster"); +# Checksums are verified if no other arguments are specified +command_ok(['pg_verify_checksums', '-D', $pgdata], + "verifies checksums as default action"); + +# Specific relation files cannot be requested when action is disable +command_fails(['pg_verify_checksums', '-d', '-r', '1234', '-D', + $pgdata], + "fails when relfilnodes are requested and action is not verify"); + # Checks cannot happen with an online cluster $node->start; -command_fails(['pg_verify_checksums', '-D', $pgdata], +command_fails(['pg_verify_checksums', '-c', '-D', $pgdata], "fails with online cluster"); # Check corruption of table on default tablespace. @@ -133,7 +169,7 @@ sub fail_corrupt my $file_name = "$pgdata/global/$file"; append_to_file $file_name, "foo"; - $node->command_checks_all([ 'pg_verify_checksums', '-D', $pgdata], + $node->command_checks_all([ 'pg_verify_checksums', '-c', '-D', $pgdata], 1, [qr/^$/], [qr/could not read block 0 in file.*$file\":/],