diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index af7c731..184564c 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -548,6 +548,18 @@ PostgreSQL documentation + + + + + Validate the given backup directory and detect the modification if any + without restarting the server. For plain backup, provide the backup + directory path with option. Tar format backups + can be verified after untarring. + + + + diff --git a/src/bin/pg_basebackup/Makefile b/src/bin/pg_basebackup/Makefile index 4f27492..3cbd061 100644 --- a/src/bin/pg_basebackup/Makefile +++ b/src/bin/pg_basebackup/Makefile @@ -25,7 +25,8 @@ OBJS = \ $(WIN32RES) \ receivelog.o \ streamutil.o \ - walmethods.o + walmethods.o \ + backuphash.o all: pg_basebackup pg_receivewal pg_recvlogical diff --git a/src/bin/pg_basebackup/backuphash.c b/src/bin/pg_basebackup/backuphash.c new file mode 100644 index 0000000..c9a20eb --- /dev/null +++ b/src/bin/pg_basebackup/backuphash.c @@ -0,0 +1,137 @@ +#include "postgres_fe.h" + +#include "backuphash.h" +#include "common/logging.h" +#include "libpq-fe.h" + +static uint32 string_hash_sdbm(const char *key); + +/* Create a new hash table for given number of records. */ +pgrhash * +pgrhash_create(int count) +{ + unsigned bucket_shift; + pgrhash *ht; + + bucket_shift = fls(count); + if (bucket_shift >= sizeof(unsigned) * BITS_PER_BYTE) + pg_log_error("too many tuples"); + + ht = (pgrhash *) pg_malloc(sizeof(pgrhash)); + ht->nbuckets = ((unsigned) 1) << bucket_shift; + ht->bucket = (pgrhash_entry **) + pg_malloc0(ht->nbuckets * sizeof(pgrhash_entry *)); + + return ht; +} + +/* + * Search a result-set hash table for a row matching a given filename. + */ +DataDirectoryFileInfo * +pgrhash_get(pgrhash *ht, char *filename) +{ + uint32 hashvalue = 0; + pgrhash_entry *bucket = NULL; + + hashvalue ^= string_hash_sdbm(filename); + + for (bucket = ht->bucket[hashvalue & (ht->nbuckets - 1)]; + bucket != NULL; bucket = bucket->next) + if (strcmp(bucket->record->filename, filename) == 0) + return bucket->record; + + return NULL; +} + +/* + * Insert a row into a result-set hash table, provided no such row is already + * present. + * + * The return value is -1 on success, or the row number of an existing row + * with the same key. + */ +int +pgrhash_insert(pgrhash *ht, DataDirectoryFileInfo *record) +{ + unsigned bucket_number; + unsigned hashvalue = 0; + pgrhash_entry *bucket; + pgrhash_entry *entry; + + hashvalue ^= string_hash_sdbm(record->filename); + + /* Check for a conflicting entry already present in the table. */ + bucket_number = hashvalue & (ht->nbuckets - 1); + for (bucket = ht->bucket[bucket_number]; + bucket != NULL; bucket = bucket->next) + if (strcmp(bucket->record->filename, record->filename) == 0) + return 0; + + /* Insert the new entry. */ + entry = pg_malloc(sizeof(pgrhash_entry)); + entry->next = ht->bucket[bucket_number]; + entry->record = record; + ht->bucket[bucket_number] = entry; + + return -1; +} + +/* + * Simple string hash function from http://www.cse.yorku.ca/~oz/hash.html + * + * The backend uses a more sophisticated function for hashing strings, + * but we don't really need that complexity here. Most of the values + * that we're hashing are short integers formatted as text, so there + * shouldn't be much room for pathological input. + */ +static uint32 +string_hash_sdbm(const char *key) +{ + uint32 hash = 0; + int c; + + while ((c = *key++)) + hash = c + (hash << 6) + (hash << 16) - hash; + + return hash; +} + +/* + * Given the hash table, determine whether each file record is visited or not + * during the comparison and return the list of filenames which are actually + * missing. + */ +extern SimpleStringList* +pgrhash_seq_search(pgrhash *ht) +{ + unsigned i; + SimpleStringList *fileList; + fileList = (SimpleStringList *) palloc(sizeof(SimpleStringList)); + fileList->head = NULL; + fileList->tail = NULL; + + for (i=0; i < ht->nbuckets; i++) + { + if (ht->bucket[i]) + { + pgrhash_entry *next_entry; + + if (!ht->bucket[i]->record->touch) + simple_string_list_append(fileList, + ht->bucket[i]->record->filename); + + for (next_entry = ht->bucket[i]->next; next_entry != NULL;) + { + if (!next_entry->record->touch) + simple_string_list_append(fileList, + next_entry->record->filename); + + next_entry = next_entry->next; + continue; + } + } + } + return fileList; + +} diff --git a/src/bin/pg_basebackup/backuphash.h b/src/bin/pg_basebackup/backuphash.h new file mode 100644 index 0000000..eb71029 --- /dev/null +++ b/src/bin/pg_basebackup/backuphash.h @@ -0,0 +1,33 @@ +#include "common/sha2.h" +#include "fe_utils/simple_list.h" + +#define CHECKSUM_LENGTH 256 + +typedef struct DataDirectoryFileInfo +{ + char filetype[10]; + char filename[MAXPGPATH]; + int filesize; + char mtime[24]; + char checksum[CHECKSUM_LENGTH]; + bool touch; +} DataDirectoryFileInfo; + +typedef struct pgrhash_entry +{ + struct pgrhash_entry *next; /* link to next entry in same bucket */ + DataDirectoryFileInfo *record; +} pgrhash_entry; + +struct pgrhash +{ + unsigned nbuckets; /* number of buckets */ + pgrhash_entry **bucket; /* pointer to hash entries */ +}; + +typedef struct pgrhash pgrhash; + +extern pgrhash *pgrhash_create(int count); +extern DataDirectoryFileInfo *pgrhash_get(pgrhash *ht, char *filename); +extern int pgrhash_insert(pgrhash *ht, DataDirectoryFileInfo *record); +extern SimpleStringList *pgrhash_seq_search(pgrhash *ht); diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 7d5ed0d..17574c9 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -27,11 +27,15 @@ #endif #include "access/xlog_internal.h" +#include "backuphash.h" +#include "common/backup.h" #include "common/file_perm.h" #include "common/file_utils.h" #include "common/logging.h" +#include "common/sha2.h" #include "common/string.h" #include "fe_utils/recovery_gen.h" +#include "fe_utils/simple_list.h" #include "fe_utils/string_utils.h" #include "getopt_long.h" #include "libpq-fe.h" @@ -44,6 +48,12 @@ #define ERRCODE_DATA_CORRUPTED "XX001" +#define CHUNK_SIZE 1024 +#define MAXCHUNKS 16 + +static const char hextbl[] = "0123456789abcdef"; +static unsigned hex_encode(const char *src, unsigned len, char *dst); + typedef struct TablespaceListCell { struct TablespaceListCell *next; @@ -94,6 +104,14 @@ typedef struct WriteManifestState FILE *file; } WriteManifestState; +typedef struct DataDirectoryAllFiles +{ + int maxchunks; + int nchunks; + int nfiles; + DataDirectoryFileInfo **files; +} DataDirectoryAllFiles; + typedef void (*WriteDataCallback) (size_t nbytes, char *buf, void *callback_data); @@ -142,6 +160,8 @@ static bool create_slot = false; static bool no_slot = false; static bool verify_checksums = true; static char *manifest_checksums = NULL; +static bool verify_backup = false; +static enum ManifestCheckSum checksum_type = MC_NONE; static bool success = false; static bool made_new_pgdata = false; @@ -201,6 +221,12 @@ static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline, static const char *get_tablespace_mapping(const char *dir); static void tablespace_list_append(const char *arg); +static void VerifyBackup(void); +static pgrhash *create_manifest_hash(char manifest_path[MAXPGPATH]); +static void scan_data_directory(char *basedir, const char *subdirpath, pgrhash *ht); +static void verify_file(struct dirent *de, char fn[MAXPGPATH], + struct stat st, char relative_path[MAXPGPATH], + pgrhash *ht); static void cleanup_directories_atexit(void) @@ -401,6 +427,7 @@ usage(void) " do not verify checksums\n")); printf(_(" --manifest-checksums=SHA256|CRC32C|NONE\n" " calculate checksums for manifest files using provided algorithm\n")); + printf(_(" --verify-backup validate the backup\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nConnection options:\n")); printf(_(" -d, --dbname=CONNSTR connection string\n")); @@ -2167,6 +2194,7 @@ main(int argc, char **argv) {"no-slot", no_argument, NULL, 2}, {"no-verify-checksums", no_argument, NULL, 3}, {"manifest-checksums", required_argument, NULL, 'm'}, + {"verify-backup", no_argument, NULL, 4}, {NULL, 0, NULL, 0} }; int c; @@ -2338,6 +2366,9 @@ main(int argc, char **argv) case 'm': manifest_checksums = pg_strdup(optarg); break; + case 4: + verify_backup = true; + break; default: /* @@ -2460,6 +2491,12 @@ main(int argc, char **argv) } #endif + if(verify_backup) + { + VerifyBackup(); + return 0; + } + /* connection in replication mode to server */ conn = GetConnection(); if (!conn) @@ -2524,3 +2561,400 @@ main(int argc, char **argv) success = true; return 0; } + +/* + * Read the backup_manifest file and generate the hash table, then scan data + * directroy and verify each file. Finally do the sequnetial scan on hash table + * to find out missing files. + */ +static void +VerifyBackup(void) +{ + char manifest_path[MAXPGPATH]; + SimpleStringList *filenames; + SimpleStringListCell *cell; + pgrhash *ht; + + snprintf(manifest_path, sizeof(manifest_path), "%s/%s", basedir, + "backup_manifest"); + + /* build has table */ + ht = create_manifest_hash(manifest_path); + + scan_data_directory(basedir, NULL, ht); + + /* sequential scan on hash table to find out misssing files */ + filenames = pgrhash_seq_search(ht); + + for (cell = filenames->head; cell; cell = cell->next) + { + pg_log_info("missing file: %s", cell->val); + } +} + +/* + * Given a file path, read that file and generate the hash table for same. + * Also generate the checksum for the records that are read from file and + * compare that with checksum written in backup_manifest file. If both + * checksums are identical then proceed, otherwise throw an error and abort. + */ +static pgrhash * +create_manifest_hash(char manifest_path[MAXPGPATH]) +{ + FILE *file; + DataDirectoryAllFiles *allfiles; + int fileno; + DataDirectoryFileInfo *record; + pgrhash *ht; + PQExpBuffer manifest; + char file_checksum[256]; + char header[1024]; + long pos = 0; + + manifest = createPQExpBuffer(); + if (!manifest) + { + pg_log_error("out of memory"); + exit(1); + } + + file = fopen(manifest_path, "r"); + + if (!file) + { + pg_log_error("could not open backup_manifest"); + exit(1); + } + + /* read the file header */ + if (fscanf(file, "%1023[^\n]\n", header) != 1) + { + pg_log_error("error while reading the header from backup_manifest"); + exit(1); + } + + appendPQExpBufferStr(manifest, header); + appendPQExpBufferStr(manifest, "\n"); + + allfiles = (DataDirectoryAllFiles *) palloc(sizeof(DataDirectoryAllFiles)); + + allfiles->maxchunks = MAXCHUNKS; + allfiles->nchunks = 0; + allfiles->files = (DataDirectoryFileInfo **) + palloc(sizeof(DataDirectoryFileInfo *) * MAXCHUNKS); + + while (!feof(file)) + { + DataDirectoryFileInfo *record; + char checksum_with_type[256]; + + if (allfiles->nfiles % CHUNK_SIZE == 0) + { + if (allfiles->maxchunks < allfiles->nchunks) + { + allfiles->maxchunks *= 2; + allfiles->files = (DataDirectoryFileInfo **) + repalloc(allfiles->files, allfiles->maxchunks * + sizeof(DataDirectoryFileInfo *)); + } + allfiles->files[allfiles->nchunks] = (DataDirectoryFileInfo *) + palloc(CHUNK_SIZE * sizeof(DataDirectoryFileInfo)); + + allfiles->nchunks++; + } + + record = &allfiles->files[allfiles->nfiles / CHUNK_SIZE][allfiles->nfiles % CHUNK_SIZE]; + + if (fscanf(file, "%s %s %d %23[^\t] %s\n", record->filetype, + record->filename, &record->filesize, + record->mtime, record->checksum) != 5) + { + /* + * On failure, re-read the last line of record and check if it is + * a last line where manifest checksum is written. If yes, then + * parse it. + */ + if (fseek(file, pos, SEEK_SET) == -1) + { + pg_log_error("error while reading the backup_manifest file"); + exit(1); + } + + if (fscanf(file, "Manifest-Checksum\t%s\n", file_checksum) != 1) + { + pg_log_error("error while reading the backup_manifest file"); + exit(1); + } + + if (feof(file)) + break; + } + + pos = ftell(file); + + record->touch = false; + + if (strcmp(record->checksum, "-") == 0) + { + checksum_type = MC_NONE; + } + else + { + snprintf(checksum_with_type, 256, "%s", record->checksum); + + if (strncmp(record->checksum, "SHA256", 6) == 0) + { + checksum_type = MC_SHA256; + snprintf(record->checksum, CHECKSUM_LENGTH, "%s", + &record->checksum[strlen("SHA256") + 1]); + } + else if (strncmp(record->checksum, "CRC32C", 3) == 0) + { + checksum_type = MC_CRC32C; + snprintf(record->checksum, CHECKSUM_LENGTH, "%s", + &record->checksum[strlen("CRC32C") + 1]); + } + else + { + pg_log_error("unknown checksum method"); + exit(1); + } + } + + appendPQExpBuffer(manifest, "File\t%s\t%d\t%s\t%s\n", record->filename, + record->filesize, record->mtime, checksum_with_type); + + allfiles->nfiles++; + } + + /* + * Once read all the records from backup_manifest file, generate the + * backup manifest checksum and compare it with the backup menifest + * checksum written in manifest file + */ + if (checksum_type != MC_NONE) + { + + char checksumbuf[CHECKSUM_LENGTH]; + int checksumbuflen; + ChecksumCtx cCtx; + char encoded_checksum[CHECKSUM_LENGTH]; + char manifest_checksum[CHECKSUM_LENGTH]; + + initialize_manifest_checksum(&cCtx, checksum_type); + update_manifest_checksum(&cCtx, checksum_type, manifest->data, + manifest->len); + checksumbuflen = finalize_manifest_checksum(&cCtx, checksum_type, + (char *) checksumbuf); + appendPQExpBuffer(manifest, "Manifest-Checksum\t"); + + switch (checksum_type) + { + case MC_SHA256: + appendPQExpBuffer(manifest, "SHA256:"); + snprintf(manifest_checksum, CHECKSUM_LENGTH, "%s", + &file_checksum[strlen("SHA256:")]); + break; + case MC_CRC32C: + appendPQExpBuffer(manifest, "CRC32C:"); + snprintf(manifest_checksum, CHECKSUM_LENGTH, "%s", + &file_checksum[strlen("CRC32C:")]); + break; + case MC_NONE: + break; + } + + checksumbuflen = hex_encode(checksumbuf, checksumbuflen, + encoded_checksum); + encoded_checksum[checksumbuflen] = '\0'; + + /* + * Compare the both checksums, if they are not same that means + * backup_manifest file is changed. Throw an error and abort. + */ + if (strcmp(encoded_checksum, manifest_checksum) != 0) + { + pg_log_error("backup manifest checksum difference. Aborting"); + exit(1); + } + } + + ht = pgrhash_create(allfiles->nfiles); + + for (fileno = 0; fileno < allfiles->nfiles; fileno++) + { + record = &allfiles->files[fileno / CHUNK_SIZE][fileno % CHUNK_SIZE]; + if (pgrhash_insert(ht, record) != -1) + pg_log_info("duplicate file present: %s\n", record->filename); + } + + return ht; +} + +/* + * Scan the data directory and check whether each file entry present in hash + * table with the correct details, i.e. filesize and checksum. + */ +static void +scan_data_directory(char *basedir, const char *subdirpath, pgrhash *ht) +{ + char path[MAXPGPATH]; + char relative_path[MAXPGPATH] = ""; + DIR *dir; + struct dirent *de; + + if (subdirpath) + { + snprintf(path, sizeof(path), "%s/%s", basedir, + subdirpath); + snprintf(relative_path, sizeof(relative_path), "%s/", subdirpath); + + } + else + snprintf(path, sizeof(path), "%s", basedir); + + dir = opendir(path); + if (!dir) + { + pg_log_error("could not open directory \"%s\": %m", path); + exit(1); + } + + while ((de = readdir(dir)) != NULL) + { + char fn[MAXPGPATH]; + struct stat st; + + if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0 || + strcmp(de->d_name, "pg_wal") == 0) + continue; + + snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name); + if (stat(fn, &st) < 0) + { + pg_log_error("could not stat file \"%s\": %m", fn); + exit(1); + } + if (S_ISREG(st.st_mode)) + { + verify_file(de, fn, st, relative_path, ht); + } + else if (S_ISDIR(st.st_mode)) + { + char newsubdirpath[MAXPGPATH]; + + if (subdirpath) + snprintf(newsubdirpath, MAXPGPATH, "%s/%s", subdirpath, + de->d_name); + else + snprintf(newsubdirpath, MAXPGPATH, "%s", de->d_name); + + scan_data_directory(basedir, newsubdirpath, ht); + } + } + closedir(dir); +} + +/* + * Given the file and its details, check whether it is present in hash table + * and if yes, then compare its details with hash table entry. + */ +static void +verify_file(struct dirent *de, char fn[MAXPGPATH], + struct stat st, char relative_path[MAXPGPATH], + pgrhash *ht) +{ + PQExpBuffer filename = NULL; + DataDirectoryFileInfo *record; + + /* Skip backup manifest file. */ + if (strcmp(de->d_name, "backup_manifest") == 0) + return; + + filename = createPQExpBuffer(); + if (!filename) + { + pg_log_error("out of memory"); + exit(1); + } + + appendPQExpBuffer(filename, "%s%s", relative_path, de->d_name); + + /* + * compare the hash and if record found then we match the file size + * and checksum (if enabled). Modified time cannot be compared with + * the file in the backup directory and its entry in the manifest + * as manifest entry gives mtime from server file whereas the same + * file in the backup will have different mtime. + */ + record = pgrhash_get(ht, filename->data); + if (record) + { + record->touch = true; + if (record->filesize != st.st_size) + pg_log_info("size changed for file: %s, original size: %d, current size: %zu", + filename->data, record->filesize, st.st_size); + + /* + * Read the file and generate the checksum based on checksum + * method and compare that with the checksum present in hash + * entry. + */ + if (checksum_type != MC_NONE) + { + FILE *fp; + char buf[1048576]; // 1MB chunk + pgoff_t len = 0; + off_t cnt; + static char checksumbuf[CHECKSUM_LENGTH]; + char encode_checksumbuf[CHECKSUM_LENGTH]; + int checksumbuflen; + ChecksumCtx cCtx; + + initialize_manifest_checksum(&cCtx, checksum_type); + + fp = fopen(fn, "r"); + if (!fp) + { + pg_log_error("could not open file \"%s\": %m", de->d_name); + exit(1); + } + + /* Read file in chunks [1 MB each chunk]*/ + while ((cnt = fread(buf, 1, Min(sizeof(buf), st.st_size - len), fp)) > 0) + { + update_manifest_checksum(&cCtx, checksum_type, buf, cnt); + len += cnt; + } + + checksumbuflen = finalize_manifest_checksum(&cCtx, checksum_type, checksumbuf); + + /* Convert checksum to hexadecimal. */ + checksumbuflen = hex_encode(checksumbuf, + checksumbuflen, + encode_checksumbuf); + encode_checksumbuf[checksumbuflen] = '\0'; + + fclose(fp); + + if (strcmp(record->checksum, encode_checksumbuf) != 0) + pg_log_info("checksum difference for file: %s", filename->data); + } + } + else + pg_log_info("extra file found: %s", filename->data); +} + +unsigned +hex_encode(const char *src, unsigned len, char *dst) +{ + const char *end = src + len; + + while (src < end) + { + *dst++ = hextbl[(*src >> 4) & 0xF]; + *dst++ = hextbl[*src & 0xF]; + src++; + } + return len * 2; +}