From 1a72a37219abdd764d74859e941cbc4dbef559e0 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 20 Mar 2020 18:01:52 -0400 Subject: [PATCH v12 2/2] Generate backup manifests for base backups; and validate them. A manifest is a JSON document which includes the file name, size, last modification time, and a checksum for each file backed up, as well as a checksum for the manifest itself. By default, we use CRC-32C for the checksum algorithm, because we are trying to detect corruption and user error, not foil an adversary. However, pg_basebackup and the server-side BASE_BACKUP command now have options to select the checksum algorithm, so users wanting a cryptographic hash function can select SHA-224, SHA-256, SHA-384, or SHA-512. Users not wanting any checksums at all can disable them, or disable generating of the backup manifest altogether. Using a cryptographic hash function in place of CRC-32C consumes significantly more CPU cycles, which may slow down backups in some cases. A new tool called pg_validatebackup can validate a backup against the manifest. If no checksums are present, it can still check that the right files exist and that they have the expected sizes. If checksums are present, it can also verify that each file has the expected checksum. Only plain format backups can be validated directly, but tar format backups can be validated after extracting them. Robert Haas, with help, ideas, review, and testing from David Steele, Stephen Frost, Andrew Dunstan, Rushabh Lathia, Suraj Kharage, Tushar Ahuja, Rajkumar Raghuwanshi, Mark Dilger, Davinder Singh, and Jeevan Chalke. Discussion: http://postgr.es/m/CA+TgmoZV8dw1H2bzZ9xkKwdrk8+XYa+DC9H=F7heO2zna5T6qg@mail.gmail.com --- doc/src/sgml/protocol.sgml | 33 +- doc/src/sgml/ref/allfiles.sgml | 1 + doc/src/sgml/ref/pg_basebackup.sgml | 63 ++ doc/src/sgml/ref/pg_validatebackup.sgml | 232 ++++++ doc/src/sgml/reference.sgml | 1 + src/backend/access/transam/xlog.c | 3 +- src/backend/replication/basebackup.c | 430 +++++++++- src/backend/replication/repl_gram.y | 13 + src/backend/replication/repl_scanner.l | 2 + src/backend/replication/walsender.c | 30 + src/bin/Makefile | 1 + src/bin/pg_basebackup/pg_basebackup.c | 184 ++++- src/bin/pg_basebackup/t/010_pg_basebackup.pl | 8 +- src/bin/pg_validatebackup/.gitignore | 2 + src/bin/pg_validatebackup/Makefile | 39 + src/bin/pg_validatebackup/parse_manifest.c | 575 ++++++++++++++ src/bin/pg_validatebackup/parse_manifest.h | 40 + src/bin/pg_validatebackup/pg_validatebackup.c | 734 ++++++++++++++++++ src/bin/pg_validatebackup/t/001_basic.pl | 30 + src/bin/pg_validatebackup/t/002_algorithm.pl | 58 ++ src/bin/pg_validatebackup/t/003_corruption.pl | 244 ++++++ src/bin/pg_validatebackup/t/004_options.pl | 89 +++ .../pg_validatebackup/t/005_bad_manifest.pl | 158 ++++ src/bin/pg_validatebackup/t/006_encoding.pl | 27 + src/include/replication/basebackup.h | 7 +- src/include/replication/walsender.h | 1 + 26 files changed, 2973 insertions(+), 32 deletions(-) create mode 100644 doc/src/sgml/ref/pg_validatebackup.sgml create mode 100644 src/bin/pg_validatebackup/.gitignore create mode 100644 src/bin/pg_validatebackup/Makefile create mode 100644 src/bin/pg_validatebackup/parse_manifest.c create mode 100644 src/bin/pg_validatebackup/parse_manifest.h create mode 100644 src/bin/pg_validatebackup/pg_validatebackup.c create mode 100644 src/bin/pg_validatebackup/t/001_basic.pl create mode 100644 src/bin/pg_validatebackup/t/002_algorithm.pl create mode 100644 src/bin/pg_validatebackup/t/003_corruption.pl create mode 100644 src/bin/pg_validatebackup/t/004_options.pl create mode 100644 src/bin/pg_validatebackup/t/005_bad_manifest.pl create mode 100644 src/bin/pg_validatebackup/t/006_encoding.pl diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index f139ba0231..d1ff53e8e8 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2466,7 +2466,7 @@ The commands accepted in replication mode are: - BASE_BACKUP [ LABEL 'label' ] [ PROGRESS ] [ FAST ] [ WAL ] [ NOWAIT ] [ MAX_RATE rate ] [ TABLESPACE_MAP ] [ NOVERIFY_CHECKSUMS ] + BASE_BACKUP [ LABEL 'label' ] [ PROGRESS ] [ FAST ] [ WAL ] [ NOWAIT ] [ MAX_RATE rate ] [ TABLESPACE_MAP ] [ NOVERIFY_CHECKSUMS ] [ MANIFEST manifest_option ] [ MANIFEST_CHECKSUMS checksum_algorithm ] BASE_BACKUP @@ -2576,6 +2576,37 @@ The commands accepted in replication mode are: + + + MANIFEST + + + When this option is specified with a value of ye' + or force-escape, a backup manifest is created + and sent along with the backup. The latter value forces all filenames + to be hex-encoded; otherwise, this type of encoding is performed only + for files whose names are non-UTF8 octet sequences. + force-escape is intended primarily for testing + purposes, to be sure that clients which read the backup manifest + can handle this case. For compatibility with previous releases, + the default is MANIFEST 'no'. + + + + + + MANIFEST_CHECKSUMS + + + Specifies the algorithm that should be used to checksum each file + for purposes of the backup manifest. Currently, the available + algorithms are NONE, CRC32C, + SHA224, SHA256, + SHA384, and SHA512. + The default is CRC32C. + + + diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml index 8d91f3529e..ab71176cdf 100644 --- a/doc/src/sgml/ref/allfiles.sgml +++ b/doc/src/sgml/ref/allfiles.sgml @@ -211,6 +211,7 @@ Complete list of usable sgml source files in this directory. + diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index 90638aad0e..bf6963a595 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -561,6 +561,69 @@ PostgreSQL documentation + + + + + + Disables generation of a backup manifest. If this option is not + specified, the server will and send generate a backup manifest + which can be verified using . + + + + + + + + + Forces all filenames in the backup manifest to be hex-encoded. + If this option is not specified, only non-UTF8 filenames are + hex-encoded. This option is mostly intended to test that tools which + read a backup manifest file properly handle this case. + + + + + + + + + Specifies the algorithm that should be used to checksum each file + for purposes of the backup manifest. Currently, the available + algorithms are NONE, CRC32C, + SHA224, SHA256, + SHA384, and SHA512. + The default is CRC32C. + + + If NONE is selected, the backup manifest will + not contain any checksums. Otherwise, it will contain a checksum + of each file in the backup using the specified algorithm. In addition, + the manifest itself will always contain a SHA256 + checksum of its own contents. The SHA algorithms + are significantly more CPU-intensive than CRC32C, + so selecting one of them may increase the time required to complete + the backup. + + + On the other hand, CRC32C is not a cryptographic + hash function, so it is only suitable for protecting against + inadvertent or random modifications to a backup. An adversary + who can modify the backup could easily do so in such a way that + the CRC does not change, whereas a SHA collision will be hard + to manufacture. (However, note that if the attacker also has access + to modify the backup manifest itself, no checksum algorithm will + provide any protection.) An additional advantage of the + SHA family of functions is that they output + a much larger number of bits. + + + can be used to check the + integrity of a backup against the backup manifest. + + + diff --git a/doc/src/sgml/ref/pg_validatebackup.sgml b/doc/src/sgml/ref/pg_validatebackup.sgml new file mode 100644 index 0000000000..1c171f6970 --- /dev/null +++ b/doc/src/sgml/ref/pg_validatebackup.sgml @@ -0,0 +1,232 @@ + + + + + pg_validatebackup + + + + pg_validatebackup + 1 + Application + + + + pg_validatebackup + verify the integrity of a base backup of a + PostgreSQL cluster + + + + + pg_validatebackup + option + + + + + + Description + + + pg_validatebackup is used to check the integrity + of a database cluster backup. The backup being checked should have been + created by pg_basebackup or some other tool that includes + a backup_manifest file with the backup. The backup + must be stored in the "plain" format; a "tar" format backup can be checked + after extracting it. Backup manifests are created by the server beginning + with PostgreSQL version 13, so older backups + cannot be validated using this tool. + + + + pg_validatebackup reads the manifest file of a + backup, verifies the manifest against its own internal checksum, and then + verifies that the same files are present in the target directory as in the + manifest itself. It then verifies that each file has the expected checksum, + unless the backup was taken the checksum algorithm set to + none, in which case checksum verification is not + performed. The presence or absence of directories is not checked, except + indirectly: if a directory is missing, any files it should have contained + will necessarily also be missing. Certain files and directories are + excluded from verification: + + + + + + backup_manifest is ignored because the backup + manifest is logically not part of the backup and does not include + any entry for itself. + + + + + + pg_wal is ignored because WAL files are sent + separately from the backup, and are therefore not described by the + backup manifest. + + + + + + postgesql.auto.conf, + standby.signal, + and recovery.signal are ignored because they may + sometimes be created or modified by the backup client itself. + (For example, pg_basebackup -R will modify + postgresql.auto.conf and create + standby.signal.) + + + + + + + Options + + + The following command-line options control the behavior. + + + + + + + + Exit as soon as a problem with the backup is detected. If this option + is not specified, pg_basebackup will continue + checking the backup even after a problem has been detected, and will + report all problems detected as errors. + + + + + + + + + + Ignore the specified file or directory, which should be expressed + as a relative pathname. If the backup contains extra files, is + missing files, or has files that have been modified as compared with + what is described in the manifest, this option can be used to suppress + the errors that would otherwise occur. If a directory is specified, + this option affects the entire subtree rooted at that location. + + + + + + + + + + Use the manifest file at the specified path, rather than one located + in the root of the backup directory. + + + + + + + + + + Don't print anything when a backup is successfully validated. + + + + + + + + + + Do not validate checksums. The presence or absence of files and the + sizes of those files will still be checked. This is much faster, + because the files themselves do not need to read. + + + + + + + + Other options are also available: + + + + + + + + Print the pg_validatebackup version and exit. + + + + + + + + + + Show help about pg_validatebackup command + line arguments, and exit. + + + + + + + + + + + Examples + + + To create a base backup of the server at mydbserver and + validate the integrity of the backup: + +$ pg_basebackup -h mydbserver -D /usr/local/pgsql/data +$ pg_validatebackup /usr/local/pgsql/data + + + + + To create a base backup of the server at mydbserver, move + the manifest somewhere outside the backup directory, and validate the + backup: + +$ pg_basebackup -h mydbserver -D /usr/local/pgsql/backup1234 +$ mv /usr/local/pgsql/backup1234/backup_manifest /my/secure/location/backup_manifest.1234 +$ pg_validatebackup -m /my/secure/location/backup_manifest.1234 /usr/local/pgsql/backup1234 + + + + + To validate a backup while ignoring a file that was added manually to the + backup directory, and also skipping checksum verification: + +$ pg_basebackup -h mydbserver -D /usr/local/pgsql/data +$ edit /usr/local/pgsql/data/note.to.self +$ pg_validatebackup --ignore=note.to.self --skip-checksums /usr/local/pgsql/data + + + + + + + See Also + + + + + + + diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml index cef09dd38b..d25a77b13c 100644 --- a/doc/src/sgml/reference.sgml +++ b/doc/src/sgml/reference.sgml @@ -255,6 +255,7 @@ &pgReceivewal; &pgRecvlogical; &pgRestore; + &pgValidateBackup; &psqlRef; &reindexdb; &vacuumdb; diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 793c076da6..b3917bc526 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -10551,7 +10551,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, ti->oid = pstrdup(de->d_name); ti->path = pstrdup(buflinkpath.data); ti->rpath = relpath ? pstrdup(relpath) : NULL; - ti->size = infotbssize ? sendTablespace(fullpath, true) : -1; + ti->size = infotbssize ? + sendTablespace(fullpath, ti->oid, true, NULL) : -1; if (tablespaces) *tablespaces = lappend(*tablespaces, ti); diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 806d013108..6dffc6ef5b 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -18,6 +18,7 @@ #include "access/xlog_internal.h" /* for pg_start/stop_backup */ #include "catalog/pg_type.h" +#include "common/checksum_helper.h" #include "common/file_perm.h" #include "commands/progress.h" #include "lib/stringinfo.h" @@ -32,6 +33,7 @@ #include "replication/basebackup.h" #include "replication/walsender.h" #include "replication/walsender_private.h" +#include "storage/buffile.h" #include "storage/bufpage.h" #include "storage/checksum.h" #include "storage/dsm_impl.h" @@ -39,10 +41,19 @@ #include "storage/ipc.h" #include "storage/reinit.h" #include "utils/builtins.h" +#include "utils/json.h" #include "utils/ps_status.h" #include "utils/relcache.h" +#include "utils/resowner.h" #include "utils/timestamp.h" +typedef enum manifest_option +{ + MANIFEST_OPTION_YES, + MANIFEST_OPTION_NO, + MANIFEST_OPTION_FORCE_ENCODE +} manifest_option; + typedef struct { const char *label; @@ -52,20 +63,43 @@ typedef struct bool includewal; uint32 maxrate; bool sendtblspcmapfile; + manifest_option manifest; + pg_checksum_type manifest_checksum_type; } basebackup_options; +struct manifest_info +{ + BufFile *buffile; + pg_checksum_type checksum_type; + pg_sha256_ctx manifest_ctx; + uint64 manifest_size; + bool force_encode; + bool first_file; + bool still_checksumming; +}; + static int64 sendDir(const char *path, int basepathlen, bool sizeonly, - List *tablespaces, bool sendtblspclinks); + List *tablespaces, bool sendtblspclinks, + manifest_info *manifest, const char *spcoid); static bool sendFile(const char *readfilename, const char *tarfilename, - struct stat *statbuf, bool missing_ok, Oid dboid); -static void sendFileWithContent(const char *filename, const char *content); + struct stat *statbuf, bool missing_ok, Oid dboid, + manifest_info *manifest, const char *spcoid); +static void sendFileWithContent(const char *filename, const char *content, + manifest_info *manifest); static int64 _tarWriteHeader(const char *filename, const char *linktarget, struct stat *statbuf, bool sizeonly); static int64 _tarWriteDir(const char *pathbuf, int basepathlen, struct stat *statbuf, bool sizeonly); static void send_int8_string(StringInfoData *buf, int64 intval); static void SendBackupHeader(List *tablespaces); +static void InitializeManifest(manifest_info *manifest, + basebackup_options *opt); +static void AppendStringToManifest(manifest_info *manifest, char *s); +static void AddFileToManifest(manifest_info *manifest, const char *spcoid, + const char *pathname, size_t size, time_t mtime, + pg_checksum_context *checksum_ctx); +static void SendBackupManifest(manifest_info *manifest); static void perform_base_backup(basebackup_options *opt); static void parse_basebackup_options(List *options, basebackup_options *opt); static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli); @@ -102,6 +136,16 @@ do { \ (errmsg("could not read from file \"%s\"", filename))); \ } while (0) +/* + * Convenience macro for appending data to the backup manifest. + */ +#define AppendToManifest(manifest, ...) \ + { \ + char *_manifest_s = psprintf(__VA_ARGS__); \ + AppendStringToManifest(manifest, _manifest_s); \ + pfree(_manifest_s); \ + } + /* The actual number of bytes, transfer of which may cause sleep. */ static uint64 throttling_sample; @@ -251,6 +295,7 @@ perform_base_backup(basebackup_options *opt) TimeLineID endtli; StringInfo labelfile; StringInfo tblspc_map_file = NULL; + manifest_info manifest; int datadirpathlen; List *tablespaces = NIL; @@ -258,12 +303,17 @@ perform_base_backup(basebackup_options *opt) backup_streamed = 0; pgstat_progress_start_command(PROGRESS_COMMAND_BASEBACKUP, InvalidOid); + /* we're going to use a BufFile, so we need a ResourceOwner */ + Assert(CurrentResourceOwner == NULL); + CurrentResourceOwner = ResourceOwnerCreate(NULL, "base backup"); + datadirpathlen = strlen(DataDir); backup_started_in_recovery = RecoveryInProgress(); labelfile = makeStringInfo(); tblspc_map_file = makeStringInfo(); + InitializeManifest(&manifest, opt); total_checksum_failures = 0; @@ -301,7 +351,10 @@ perform_base_backup(basebackup_options *opt) /* Add a node for the base directory at the end */ ti = palloc0(sizeof(tablespaceinfo)); - ti->size = opt->progress ? sendDir(".", 1, true, tablespaces, true) : -1; + if (opt->progress) + ti->size = sendDir(".", 1, true, tablespaces, true, NULL, NULL); + else + ti->size = -1; tablespaces = lappend(tablespaces, ti); /* @@ -380,7 +433,8 @@ perform_base_backup(basebackup_options *opt) struct stat statbuf; /* In the main tar, include the backup_label first... */ - sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data); + sendFileWithContent(BACKUP_LABEL_FILE, labelfile->data, + &manifest); /* * Send tablespace_map file if required and then the bulk of @@ -388,11 +442,14 @@ perform_base_backup(basebackup_options *opt) */ if (tblspc_map_file && opt->sendtblspcmapfile) { - sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data); - sendDir(".", 1, false, tablespaces, false); + sendFileWithContent(TABLESPACE_MAP, tblspc_map_file->data, + &manifest); + sendDir(".", 1, false, tablespaces, false, + &manifest, NULL); } else - sendDir(".", 1, false, tablespaces, true); + sendDir(".", 1, false, tablespaces, true, + &manifest, NULL); /* ... and pg_control after everything else. */ if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0) @@ -400,10 +457,11 @@ perform_base_backup(basebackup_options *opt) (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", XLOG_CONTROL_FILE))); - sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false, InvalidOid); + sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, + false, InvalidOid, &manifest, NULL); } else - sendTablespace(ti->path, false); + sendTablespace(ti->path, ti->oid, false, &manifest); /* * If we're including WAL, and this is the main data directory we @@ -632,7 +690,7 @@ perform_base_backup(basebackup_options *opt) * complete segment. */ StatusFilePath(pathbuf, walFileName, ".done"); - sendFileWithContent(pathbuf, ""); + sendFileWithContent(pathbuf, "", &manifest); } /* @@ -655,16 +713,20 @@ perform_base_backup(basebackup_options *opt) (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", pathbuf))); - sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid); + sendFile(pathbuf, pathbuf, &statbuf, false, InvalidOid, + &manifest, NULL); /* unconditionally mark file as archived */ StatusFilePath(pathbuf, fname, ".done"); - sendFileWithContent(pathbuf, ""); + sendFileWithContent(pathbuf, "", &manifest); } /* Send CopyDone message for the last tar file */ pq_putemptymessage('c'); } + + SendBackupManifest(&manifest); + SendXlogRecPtrResult(endptr, endtli); if (total_checksum_failures) @@ -678,6 +740,9 @@ perform_base_backup(basebackup_options *opt) errmsg("checksum verification failure during base backup"))); } + /* clean up the resource owner we created */ + WalSndResourceCleanup(true); + pgstat_progress_end_command(); } @@ -709,8 +774,13 @@ parse_basebackup_options(List *options, basebackup_options *opt) bool o_maxrate = false; bool o_tablespace_map = false; bool o_noverify_checksums = false; + bool o_manifest = false; + bool o_manifest_checksums = false; MemSet(opt, 0, sizeof(*opt)); + opt->manifest = MANIFEST_OPTION_NO; + opt->manifest_checksum_type = CHECKSUM_TYPE_CRC32C; + foreach(lopt, options) { DefElem *defel = (DefElem *) lfirst(lopt); @@ -797,12 +867,61 @@ parse_basebackup_options(List *options, basebackup_options *opt) noverify_checksums = true; o_noverify_checksums = true; } + else if (strcmp(defel->defname, "manifest") == 0) + { + char *optval = strVal(defel->arg); + bool manifest_bool; + + if (o_manifest) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate option \"%s\"", defel->defname))); + if (parse_bool(optval, &manifest_bool)) + { + if (manifest_bool) + opt->manifest = MANIFEST_OPTION_YES; + else + opt->manifest = MANIFEST_OPTION_NO; + } + else if (pg_strcasecmp(optval, "force-encode") == 0) + opt->manifest = MANIFEST_OPTION_FORCE_ENCODE; + else + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized manifest option: \"%s\"", + optval))); + o_manifest = true; + } + else if (strcmp(defel->defname, "manifest_checksums") == 0) + { + char *optval = strVal(defel->arg); + + if (o_manifest_checksums) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("duplicate option \"%s\"", defel->defname))); + if (!pg_checksum_parse_type(optval, + &opt->manifest_checksum_type)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("unrecognized checksum algorithm: \"%s\"", + optval))); + o_manifest_checksums = true; + } else elog(ERROR, "option \"%s\" not recognized", defel->defname); } if (opt->label == NULL) opt->label = "base backup"; + if (opt->manifest == MANIFEST_OPTION_NO) + { + if (o_manifest_checksums) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("manifest checksums require a backup manifest"))); + opt->manifest_checksum_type = CHECKSUM_TYPE_NONE; + } } @@ -918,6 +1037,249 @@ SendBackupHeader(List *tablespaces) pq_puttextmessage('C', "SELECT"); } +/* + * Initialize state so that we can construct a backup manifest. + * + * NB: Although the checksum type for the data files is configurable, the + * checksum for the manifest itself always uses SHA-256. See comments in + * SendBackupManifest. + */ +static void +InitializeManifest(manifest_info *manifest, basebackup_options *opt) +{ + if (opt->manifest == MANIFEST_OPTION_NO) + manifest->buffile = NULL; + else + manifest->buffile = BufFileCreateTemp(false); + manifest->checksum_type = opt->manifest_checksum_type; + pg_sha256_init(&manifest->manifest_ctx); + manifest->manifest_size = UINT64CONST(0); + manifest->force_encode = (opt->manifest == MANIFEST_OPTION_FORCE_ENCODE); + manifest->first_file = true; + manifest->still_checksumming = true; + + if (opt->manifest != MANIFEST_OPTION_NO) + AppendToManifest(manifest, + "{ \"PostgreSQL-Backup-Manifest-Version\": 1,\n" + "\"Files\": ["); +} + +/* + * Append a cstring to the manifest. + */ +static void +AppendStringToManifest(manifest_info *manifest, char *s) +{ + int len = strlen(s); + size_t written; + + Assert(manifest != NULL); + if (manifest->still_checksumming) + pg_sha256_update(&manifest->manifest_ctx, (uint8 *) s, len); + written = BufFileWrite(manifest->buffile, s, len); + if (written != len) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to temporary file: %m"))); + manifest->manifest_size += len; +} + +/* + * Add an entry to the backup manifest for a file. + */ +static void +AddFileToManifest(manifest_info *manifest, const char *spcoid, + const char *pathname, size_t size, time_t mtime, + pg_checksum_context *checksum_ctx) +{ + char pathbuf[MAXPGPATH]; + int pathlen; + StringInfoData buf; + + /* + * If there is no buffile, then the user doesn't want a manifest, so + * don't waste any time generating one. + */ + if (manifest->buffile == NULL) + return; + + /* + * If this file is part of a tablespace, the pathname passed to this + * function will be relative to the tar file that contains it. We want the + * pathname relative to the data directory (ignoring the intermediate + * symlink traversal). + */ + if (spcoid != NULL) + { + snprintf(pathbuf, sizeof(pathbuf), "pg_tblspc/%s/%s", spcoid, + pathname); + pathname = pathbuf; + } + + /* + * Each file's entry need to be separated from any entry that follows + * by a comma, but there's no comma before the first one or after the + * last one. To make that work, adding a file to the manifest starts + * by terminating the most recently added line, with a comma if + * appropriate, but does not terminate the line inserted for this file. + */ + initStringInfo(&buf); + if (manifest->first_file) + { + appendStringInfoString(&buf, "\n"); + manifest->first_file = false; + } + else + appendStringInfoString(&buf, ",\n"); + + /* + * Write the relative pathname to this file out to the manifest. The + * manifest is always stored in UTF-8, so we have to encode paths that + * are not valid in that encoding. + */ + pathlen = strlen(pathname); + if (!manifest->force_encode && + pg_verify_mbstr(PG_UTF8, pathname, pathlen, true)) + { + appendStringInfoString(&buf, "{ \"Path\": "); + escape_json(&buf, pathname); + appendStringInfoString(&buf, ", "); + } + else + { + appendStringInfoString(&buf, "{ \"Encoded-Path\": \""); + enlargeStringInfo(&buf, 2 * pathlen); + buf.len += hex_encode((char *) pathname, pathlen, + &buf.data[buf.len]); + appendStringInfoString(&buf, "\", "); + } + + appendStringInfo(&buf, "\"Size\": %zu, ", size); + + /* + * Convert last modification time to a string and append it to the + * manifest. Since it's not clear what time zone to use and since time + * zone definitions can change, possibly causing confusion, use GMT always. + */ + appendStringInfoString(&buf, "\"Last-Modified\": \""); + enlargeStringInfo(&buf, 128); + buf.len += pg_strftime(&buf.data[buf.len], 128, "%Y-%m-%d %H:%M:%S %Z", + pg_gmtime(&mtime)); + appendStringInfoString(&buf, "\""); + + /* Add checksum information. */ + if (checksum_ctx->type != CHECKSUM_TYPE_NONE) + { + uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH]; + int checksumlen; + + checksumlen = pg_checksum_final(checksum_ctx, checksumbuf); + + appendStringInfo(&buf, + ", \"Checksum-Algorithm\": \"%s\", \"Checksum\": \"", + pg_checksum_type_name(checksum_ctx->type)); + enlargeStringInfo(&buf, 2 * checksumlen); + buf.len += hex_encode((char *) checksumbuf, checksumlen, + &buf.data[buf.len]); + appendStringInfoString(&buf, "\""); + } + + /* Close out the object. */ + appendStringInfoString(&buf, " }"); + + /* OK, add it to the manifest. */ + AppendStringToManifest(manifest, buf.data); + + /* Avoid leaking memory. */ + pfree(buf.data); +} + +/* + * Finalize the backup manifest, and send it to the client. + */ +static void +SendBackupManifest(manifest_info *manifest) +{ + StringInfoData protobuf; + uint8 checksumbuf[PG_SHA256_DIGEST_LENGTH]; + char checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH]; + size_t manifest_bytes_done = 0; + + /* + * If there is no buffile, then the user doesn't want a manifest, so + * don't waste any time generating one. + */ + if (manifest->buffile == NULL) + return; + + /* Terminate the list of files. */ + AppendStringToManifest(manifest, "],\n"); + + /* + * Append manifest checksum, so that the problems with the manifest itself + * can be detected. + * + * We always use SHA-256 for this, regardless of what algorithm is chosen + * for checksumming the files. If we ever want to make the checksum + * algorithm used for the manifest file variable, the client will need a + * way to figure out which algorithm to use as close to the beginning of + * the manifest file as possible, to avoid having to read the whole thing + * twice. + */ + manifest->still_checksumming = false; + pg_sha256_final(&manifest->manifest_ctx, checksumbuf); + AppendStringToManifest(manifest, "\"Manifest-Checksum\": \""); + hex_encode((char *) checksumbuf, sizeof checksumbuf, checksumstringbuf); + checksumstringbuf[PG_SHA256_DIGEST_STRING_LENGTH - 1] = '\0'; + AppendStringToManifest(manifest, checksumstringbuf); + AppendStringToManifest(manifest, "\"}\n"); + + /* + * We've written all the data to the manifest file. Rewind the file so + * that we can read it all back. + */ + if (BufFileSeek(manifest->buffile, 0, 0L, SEEK_SET)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not rewind temporary file: %m"))); + + /* Send CopyOutResponse message */ + pq_beginmessage(&protobuf, 'H'); + pq_sendbyte(&protobuf, 0); /* overall format */ + pq_sendint16(&protobuf, 0); /* natts */ + pq_endmessage(&protobuf); + + /* + * Send CopyData messages. + * + * We choose to read back the data from the temporary file in chunks of + * size BLCKSZ; this isn't necessary, but buffile.c uses that as the I/O + * size, so it seems to make sense to match that value here. + */ + while (manifest_bytes_done < manifest->manifest_size) + { + char manifestbuf[BLCKSZ]; + size_t bytes_to_read; + size_t rc; + + bytes_to_read = Min(sizeof(manifestbuf), + manifest->manifest_size - manifest_bytes_done); + rc = BufFileRead(manifest->buffile, manifestbuf, bytes_to_read); + if (rc != bytes_to_read) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read from temporary file: %m"))); + pq_putmessage('d', manifestbuf, bytes_to_read); + manifest_bytes_done += bytes_to_read; + } + + /* No more data, so send CopyDone message */ + pq_putemptymessage('c'); + + /* Release resources */ + BufFileClose(manifest->buffile); +} + /* * Send a single resultset containing just a single * XLogRecPtr record (in text format) @@ -978,11 +1340,15 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli) * Inject a file with given name and content in the output tar stream. */ static void -sendFileWithContent(const char *filename, const char *content) +sendFileWithContent(const char *filename, const char *content, + manifest_info *manifest) { struct stat statbuf; int pad, len; + pg_checksum_context checksum_ctx; + + pg_checksum_init(&checksum_ctx, manifest->checksum_type); len = strlen(content); @@ -1017,6 +1383,10 @@ sendFileWithContent(const char *filename, const char *content) pq_putmessage('d', buf, pad); update_basebackup_progress(pad); } + + pg_checksum_update(&checksum_ctx, (uint8 *) content, len); + AddFileToManifest(manifest, NULL, filename, len, statbuf.st_mtime, + &checksum_ctx); } /* @@ -1027,7 +1397,8 @@ sendFileWithContent(const char *filename, const char *content) * Only used to send auxiliary tablespaces, not PGDATA. */ int64 -sendTablespace(char *path, bool sizeonly) +sendTablespace(char *path, char *spcoid, bool sizeonly, + manifest_info *manifest) { int64 size; char pathbuf[MAXPGPATH]; @@ -1060,7 +1431,8 @@ sendTablespace(char *path, bool sizeonly) sizeonly); /* Send all the files in the tablespace version directory */ - size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true); + size += sendDir(pathbuf, strlen(path), sizeonly, NIL, true, manifest, + spcoid); return size; } @@ -1079,7 +1451,7 @@ sendTablespace(char *path, bool sizeonly) */ static int64 sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, - bool sendtblspclinks) + bool sendtblspclinks, manifest_info *manifest, const char *spcoid) { DIR *dir; struct dirent *de; @@ -1359,7 +1731,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, skip_this_dir = true; if (!skip_this_dir) - size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, sendtblspclinks); + size += sendDir(pathbuf, basepathlen, sizeonly, tablespaces, + sendtblspclinks, manifest, spcoid); } else if (S_ISREG(statbuf.st_mode)) { @@ -1367,7 +1740,8 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, if (!sizeonly) sent = sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf, - true, isDbDir ? atooid(lastDir + 1) : InvalidOid); + true, isDbDir ? atooid(lastDir + 1) : InvalidOid, + manifest, spcoid); if (sent || sizeonly) { @@ -1437,8 +1811,9 @@ is_checksummed_file(const char *fullpath, const char *filename) * and the file did not exist. */ static bool -sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf, - bool missing_ok, Oid dboid) +sendFile(const char *readfilename, const char *tarfilename, + struct stat *statbuf, bool missing_ok, Oid dboid, + manifest_info *manifest, const char *spcoid) { FILE *fp; BlockNumber blkno = 0; @@ -1455,6 +1830,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf int segmentno = 0; char *segmentpath; bool verify_checksum = false; + pg_checksum_context checksum_ctx; + + pg_checksum_init(&checksum_ctx, manifest->checksum_type); fp = AllocateFile(readfilename, "rb"); if (fp == NULL) @@ -1625,6 +2003,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf (errmsg("base backup could not send data, aborting backup"))); update_basebackup_progress(cnt); + /* Also feed it to the checksum machinery. */ + pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt); + len += cnt; throttle(cnt); @@ -1649,6 +2030,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf { cnt = Min(sizeof(buf), statbuf->st_size - len); pq_putmessage('d', buf, cnt); + pg_checksum_update(&checksum_ctx, (uint8 *) buf, cnt); update_basebackup_progress(cnt); len += cnt; throttle(cnt); @@ -1657,7 +2039,8 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf /* * Pad to 512 byte boundary, per tar format requirements. (This small - * piece of data is probably not worth throttling.) + * piece of data is probably not worth throttling, and is not checksummed + * because it's not actually part of the file.) */ pad = ((len + 511) & ~511) - len; if (pad > 0) @@ -1682,6 +2065,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf total_checksum_failures += checksum_failures; + AddFileToManifest(manifest, spcoid, tarfilename, statbuf->st_size, + statbuf->st_mtime, &checksum_ctx); + return true; } diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y index 14fcd53221..f93a0de218 100644 --- a/src/backend/replication/repl_gram.y +++ b/src/backend/replication/repl_gram.y @@ -87,6 +87,8 @@ static SQLCmd *make_sqlcmd(void); %token K_EXPORT_SNAPSHOT %token K_NOEXPORT_SNAPSHOT %token K_USE_SNAPSHOT +%token K_MANIFEST +%token K_MANIFEST_CHECKSUMS %type command %type base_backup start_replication start_logical_replication @@ -156,6 +158,7 @@ var_name: IDENT { $$ = $1; } /* * BASE_BACKUP [LABEL '