From 0176762890bc8ddc02e26696ba4414c6c8bac2e5 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Thu, 31 Aug 2023 07:59:52 -0700 Subject: [PATCH v9 3/4] add support for syncfs in frontend support functions --- src/bin/initdb/initdb.c | 5 +- src/bin/pg_basebackup/pg_basebackup.c | 5 +- src/bin/pg_checksums/pg_checksums.c | 3 +- src/bin/pg_dump/pg_backup.h | 4 +- src/bin/pg_dump/pg_backup_archiver.c | 14 ++- src/bin/pg_dump/pg_backup_archiver.h | 1 + src/bin/pg_dump/pg_backup_directory.c | 2 +- src/bin/pg_dump/pg_dump.c | 3 +- src/bin/pg_rewind/file_ops.c | 2 +- src/bin/pg_rewind/pg_rewind.c | 1 + src/bin/pg_rewind/pg_rewind.h | 2 + src/common/file_utils.c | 170 +++++++++++++++++++++----- src/include/common/file_utils.h | 5 +- 13 files changed, 172 insertions(+), 45 deletions(-) diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 905b979947..bbea1e412b 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -165,6 +165,7 @@ static bool show_setting = false; static bool data_checksums = false; static char *xlog_dir = NULL; static int wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024); +static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; /* internal vars */ @@ -3333,7 +3334,7 @@ main(int argc, char *argv[]) fputs(_("syncing data to disk ... "), stdout); fflush(stdout); - fsync_pgdata(pg_data, PG_VERSION_NUM); + fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method); check_ok(); return 0; } @@ -3396,7 +3397,7 @@ main(int argc, char *argv[]) { fputs(_("syncing data to disk ... "), stdout); fflush(stdout); - fsync_pgdata(pg_data, PG_VERSION_NUM); + fsync_pgdata(pg_data, PG_VERSION_NUM, sync_method); check_ok(); } else diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 1dc8efe0cb..1a6eacf6d5 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -148,6 +148,7 @@ static bool verify_checksums = true; static bool manifest = true; static bool manifest_force_encode = false; static char *manifest_checksums = NULL; +static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; static bool success = false; static bool made_new_pgdata = false; @@ -2199,11 +2200,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail, if (format == 't') { if (strcmp(basedir, "-") != 0) - (void) fsync_dir_recurse(basedir); + (void) fsync_dir_recurse(basedir, sync_method); } else { - (void) fsync_pgdata(basedir, serverVersion); + (void) fsync_pgdata(basedir, serverVersion, sync_method); } } diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c index 9011a19b4e..123450f483 100644 --- a/src/bin/pg_checksums/pg_checksums.c +++ b/src/bin/pg_checksums/pg_checksums.c @@ -44,6 +44,7 @@ static char *only_filenode = NULL; static bool do_sync = true; static bool verbose = false; static bool showprogress = false; +static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; typedef enum { @@ -623,7 +624,7 @@ main(int argc, char *argv[]) if (do_sync) { pg_log_info("syncing data directory"); - fsync_pgdata(DataDir, PG_VERSION_NUM); + fsync_pgdata(DataDir, PG_VERSION_NUM, sync_method); } pg_log_info("updating control file"); diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index aba780ef4b..3a57cdd97d 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -24,6 +24,7 @@ #define PG_BACKUP_H #include "common/compression.h" +#include "common/file_utils.h" #include "fe_utils/simple_list.h" #include "libpq-fe.h" @@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt); extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt, const pg_compress_specification compression_spec, bool dosync, ArchiveMode mode, - SetupWorkerPtrType setupDumpWorker); + SetupWorkerPtrType setupDumpWorker, + DataDirSyncMethod sync_method); /* The --list option */ extern void PrintTOCSummary(Archive *AHX); diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 39ebcfec32..4d83381d84 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -66,7 +66,8 @@ typedef struct _parallelReadyList static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt, const pg_compress_specification compression_spec, bool dosync, ArchiveMode mode, - SetupWorkerPtrType setupWorkerPtr); + SetupWorkerPtrType setupWorkerPtr, + DataDirSyncMethod sync_method); static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te); static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData); static char *sanitize_line(const char *str, bool want_hyphen); @@ -238,11 +239,12 @@ Archive * CreateArchive(const char *FileSpec, const ArchiveFormat fmt, const pg_compress_specification compression_spec, bool dosync, ArchiveMode mode, - SetupWorkerPtrType setupDumpWorker) + SetupWorkerPtrType setupDumpWorker, + DataDirSyncMethod sync_method) { ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec, - dosync, mode, setupDumpWorker); + dosync, mode, setupDumpWorker, sync_method); return (Archive *) AH; } @@ -257,7 +259,8 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt) compression_spec.algorithm = PG_COMPRESSION_NONE; AH = _allocAH(FileSpec, fmt, compression_spec, true, - archModeRead, setupRestoreWorker); + archModeRead, setupRestoreWorker, + DATA_DIR_SYNC_METHOD_FSYNC); return (Archive *) AH; } @@ -2233,7 +2236,7 @@ static ArchiveHandle * _allocAH(const char *FileSpec, const ArchiveFormat fmt, const pg_compress_specification compression_spec, bool dosync, ArchiveMode mode, - SetupWorkerPtrType setupWorkerPtr) + SetupWorkerPtrType setupWorkerPtr, DataDirSyncMethod sync_method) { ArchiveHandle *AH; CompressFileHandle *CFH; @@ -2287,6 +2290,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt, AH->mode = mode; AH->compression_spec = compression_spec; AH->dosync = dosync; + AH->sync_method = sync_method; memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse)); diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h index 18b38c17ab..b07673933d 100644 --- a/src/bin/pg_dump/pg_backup_archiver.h +++ b/src/bin/pg_dump/pg_backup_archiver.h @@ -312,6 +312,7 @@ struct _archiveHandle pg_compress_specification compression_spec; /* Requested specification for * compression */ bool dosync; /* data requested to be synced on sight */ + DataDirSyncMethod sync_method; ArchiveMode mode; /* File mode - r or w */ void *formatData; /* Header data specific to file format */ diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c index 7f2ac7c7fd..6faa3a511f 100644 --- a/src/bin/pg_dump/pg_backup_directory.c +++ b/src/bin/pg_dump/pg_backup_directory.c @@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH) * individually. Just recurse once through all the files generated. */ if (AH->dosync) - fsync_dir_recurse(ctx->directory); + fsync_dir_recurse(ctx->directory, AH->sync_method); } AH->FH = NULL; } diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 65f64c282d..39a468b131 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -357,6 +357,7 @@ main(int argc, char **argv) char *compression_algorithm_str = "none"; char *error_detail = NULL; bool user_compression_defined = false; + DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; static DumpOptions dopt; @@ -777,7 +778,7 @@ main(int argc, char **argv) /* Open the output file */ fout = CreateArchive(filename, archiveFormat, compression_spec, - dosync, archiveMode, setupDumpWorker); + dosync, archiveMode, setupDumpWorker, sync_method); /* Make dump options accessible right away */ SetArchiveOptions(fout, &dopt, NULL); diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c index 25996b4da4..451fb1856e 100644 --- a/src/bin/pg_rewind/file_ops.c +++ b/src/bin/pg_rewind/file_ops.c @@ -296,7 +296,7 @@ sync_target_dir(void) if (!do_sync || dry_run) return; - fsync_pgdata(datadir_target, PG_VERSION_NUM); + fsync_pgdata(datadir_target, PG_VERSION_NUM, sync_method); } diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 7f69f02441..bdfacf3263 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -74,6 +74,7 @@ bool showprogress = false; bool dry_run = false; bool do_sync = true; bool restore_wal = false; +DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC; /* Target history */ TimeLineHistoryEntry *targetHistory; diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h index ef8bdc1fbb..05729adfef 100644 --- a/src/bin/pg_rewind/pg_rewind.h +++ b/src/bin/pg_rewind/pg_rewind.h @@ -13,6 +13,7 @@ #include "access/timeline.h" #include "common/logging.h" +#include "common/file_utils.h" #include "datapagemap.h" #include "libpq-fe.h" #include "storage/block.h" @@ -24,6 +25,7 @@ extern bool showprogress; extern bool dry_run; extern bool do_sync; extern int WalSegSz; +extern DataDirSyncMethod sync_method; /* Target history */ extern TimeLineHistoryEntry *targetHistory; diff --git a/src/common/file_utils.c b/src/common/file_utils.c index 74833c4acb..05c73c0bb7 100644 --- a/src/common/file_utils.c +++ b/src/common/file_utils.c @@ -51,6 +51,31 @@ static void walkdir(const char *path, int (*action) (const char *fname, bool isdir), bool process_symlinks); +#ifdef HAVE_SYNCFS +static void +do_syncfs(const char *path) +{ + int fd; + + fd = open(path, O_RDONLY, 0); + + if (fd < 0) + { + pg_log_error("could not open file \"%s\": %m", path); + return; + } + + if (syncfs(fd) < 0) + { + pg_log_error("could not synchronize file system for file \"%s\": %m", path); + (void) close(fd); + exit(EXIT_FAILURE); + } + + (void) close(fd); +} +#endif + /* * Issue fsync recursively on PGDATA and all its contents. * @@ -63,7 +88,8 @@ static void walkdir(const char *path, */ void fsync_pgdata(const char *pg_data, - int serverVersion) + int serverVersion, + DataDirSyncMethod sync_method) { bool xlog_is_symlink; char pg_wal[MAXPGPATH]; @@ -89,30 +115,93 @@ fsync_pgdata(const char *pg_data, xlog_is_symlink = true; } - /* - * If possible, hint to the kernel that we're soon going to fsync the data - * directory and its contents. - */ + switch (sync_method) + { + case DATA_DIR_SYNC_METHOD_SYNCFS: + { +#ifndef HAVE_SYNCFS + pg_log_error("this build does not support sync method \"%s\"", + "syncfs"); + exit(EXIT_FAILURE); +#else + DIR *dir; + struct dirent *de; + + /* + * On Linux, we don't have to open every single file one by + * one. We can use syncfs() to sync whole filesystems. We + * only expect filesystem boundaries to exist where we + * tolerate symlinks, namely pg_wal and the tablespaces, so we + * call syncfs() for each of those directories. + */ + + /* Sync the top level pgdata directory. */ + do_syncfs(pg_data); + + /* If any tablespaces are configured, sync each of those. */ + dir = opendir(pg_tblspc); + if (dir == NULL) + pg_log_error("could not open directory \"%s\": %m", + pg_tblspc); + else + { + while (errno = 0, (de = readdir(dir)) != NULL) + { + char subpath[MAXPGPATH * 2]; + + if (strcmp(de->d_name, ".") == 0 || + strcmp(de->d_name, "..") == 0) + continue; + + snprintf(subpath, sizeof(subpath), "%s/%s", + pg_tblspc, de->d_name); + do_syncfs(subpath); + } + + if (errno) + pg_log_error("could not read directory \"%s\": %m", + pg_tblspc); + + (void) closedir(dir); + } + + /* If pg_wal is a symlink, process that too. */ + if (xlog_is_symlink) + do_syncfs(pg_wal); +#endif /* HAVE_SYNCFS */ + } + break; + + case DATA_DIR_SYNC_METHOD_FSYNC: + { + /* + * If possible, hint to the kernel that we're soon going to + * fsync the data directory and its contents. + */ #ifdef PG_FLUSH_DATA_WORKS - walkdir(pg_data, pre_sync_fname, false); - if (xlog_is_symlink) - walkdir(pg_wal, pre_sync_fname, false); - walkdir(pg_tblspc, pre_sync_fname, true); + walkdir(pg_data, pre_sync_fname, false); + if (xlog_is_symlink) + walkdir(pg_wal, pre_sync_fname, false); + walkdir(pg_tblspc, pre_sync_fname, true); #endif - /* - * Now we do the fsync()s in the same order. - * - * The main call ignores symlinks, so in addition to specially processing - * pg_wal if it's a symlink, pg_tblspc has to be visited separately with - * process_symlinks = true. Note that if there are any plain directories - * in pg_tblspc, they'll get fsync'd twice. That's not an expected case - * so we don't worry about optimizing it. - */ - walkdir(pg_data, fsync_fname, false); - if (xlog_is_symlink) - walkdir(pg_wal, fsync_fname, false); - walkdir(pg_tblspc, fsync_fname, true); + /* + * Now we do the fsync()s in the same order. + * + * The main call ignores symlinks, so in addition to specially + * processing pg_wal if it's a symlink, pg_tblspc has to be + * visited separately with process_symlinks = true. Note that + * if there are any plain directories in pg_tblspc, they'll + * get fsync'd twice. That's not an expected case so we don't + * worry about optimizing it. + */ + walkdir(pg_data, fsync_fname, false); + if (xlog_is_symlink) + walkdir(pg_wal, fsync_fname, false); + walkdir(pg_tblspc, fsync_fname, true); + } + break; + } } /* @@ -121,17 +210,40 @@ fsync_pgdata(const char *pg_data, * This is a convenient wrapper on top of walkdir(). */ void -fsync_dir_recurse(const char *dir) +fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method) { - /* - * If possible, hint to the kernel that we're soon going to fsync the data - * directory and its contents. - */ + switch (sync_method) + { + case DATA_DIR_SYNC_METHOD_SYNCFS: + { +#ifndef HAVE_SYNCFS + pg_log_error("this build does not support sync method \"%s\"", + "syncfs"); + exit(EXIT_FAILURE); +#else + /* + * On Linux, we don't have to open every single file one by + * one. We can use syncfs() to sync the whole filesystem. + */ + do_syncfs(dir); +#endif /* HAVE_SYNCFS */ + } + break; + + case DATA_DIR_SYNC_METHOD_FSYNC: + { + /* + * If possible, hint to the kernel that we're soon going to + * fsync the data directory and its contents. + */ #ifdef PG_FLUSH_DATA_WORKS - walkdir(dir, pre_sync_fname, false); + walkdir(dir, pre_sync_fname, false); #endif - walkdir(dir, fsync_fname, false); + walkdir(dir, fsync_fname, false); + } + break; + } } /* diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h index 7da21f15e6..cae6159bf6 100644 --- a/src/include/common/file_utils.h +++ b/src/include/common/file_utils.h @@ -34,8 +34,9 @@ struct iovec; /* avoid including port/pg_iovec.h here */ #ifdef FRONTEND extern int fsync_fname(const char *fname, bool isdir); -extern void fsync_pgdata(const char *pg_data, int serverVersion); -extern void fsync_dir_recurse(const char *dir); +extern void fsync_pgdata(const char *pg_data, int serverVersion, + DataDirSyncMethod sync_method); +extern void fsync_dir_recurse(const char *dir, DataDirSyncMethod sync_method); extern int durable_rename(const char *oldfile, const char *newfile); extern int fsync_parent_path(const char *fname); #endif -- 2.25.1