diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index b7f43d5..20b6227 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -43,6 +43,9 @@ const XLogRecPtr InvalidXLogRecPtr = {0, 0}; /* fd for currently open WAL file */ static int walfile = -1; +static int fsync_parent_path(const char *fname); +static int fsync_fname_ext(const char *fname, bool isdir); +static int durable_rename(const char *oldfile, const char *newfile); static bool mark_file_as_archived(const char *basedir, const char *fname) @@ -61,17 +64,13 @@ mark_file_as_archived(const char *basedir, const char *fname) return false; } - if (fsync(fd) != 0) - { - fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), - progname, tmppath, strerror(errno)); - - close(fd); + close(fd); + if (fsync_fname_ext(tmppath, false) != 0) return false; - } - close(fd); + if (fsync_parent_path(tmppath) != 0) + return false; return true; } @@ -109,6 +108,10 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir, /* * Verify that the file is either empty (just created), or a complete * XLogSegSize segment. Anything in between indicates a corrupt file. + * + * XXX: This means that we might not restart if a crash occurs before the + * fsync below. We probably should create the file in a temporary path + * like the backend does... */ if (fstat(f, &statbuf) != 0) { @@ -119,7 +122,19 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir, return -1; } if (statbuf.st_size == XLogSegSize) - return f; /* File is open and ready to use */ + { + /* + * fsync, in case of a previous crash between padding and fsyncing the + * file. + */ + if (fsync_fname_ext(fn, false) != 0) + return -1; + if (fsync_parent_path(fn) != 0) + return -1; + + /* File is open and ready to use */ + return f; + } if (statbuf.st_size != 0) { fprintf(stderr, @@ -146,6 +161,15 @@ open_walfile(XLogRecPtr startpoint, uint32 timeline, char *basedir, } free(zerobuf); + /* + * fsync WAL file and containing directory, to ensure the file is + * persistently created and zeroed. + */ + if (fsync_fname_ext(fn, false) != 0) + return false; + if (fsync_parent_path(fn) != 0) + return false; + if (lseek(f, SEEK_SET, 0) != 0) { fprintf(stderr, @@ -205,10 +229,9 @@ close_walfile(char *basedir, char *walname, bool segment_complete, snprintf(oldfn, sizeof(oldfn), "%s/%s.partial", basedir, walname); snprintf(newfn, sizeof(newfn), "%s/%s", basedir, walname); - if (rename(oldfn, newfn) != 0) + if (durable_rename(oldfn, newfn) != 0) { - fprintf(stderr, _("%s: could not rename file \"%s\": %s\n"), - progname, walname, strerror(errno)); + /* durable_rename produced a log entry */ return false; } } @@ -304,6 +327,132 @@ localTimestampDifferenceExceeds(TimestampTz start_time, } /* + * fsync_fname_ext -- Try to fsync a file or directory + * + * Returns 0 if the operation succeeded, -1 otherwise. + * + * XXX: This is a near-duplicate of initdb.c's fsync_fname_ext(); they should + * be unified into a common place. + */ +static int +fsync_fname_ext(const char *fname, bool isdir) +{ + int fd; + int flags; + int returncode; + + /* + * Some OSs require directories to be opened read-only whereas other + * systems don't allow us to fsync files opened read-only; so we need both + * cases here. Using O_RDWR will cause us to fail to fsync files that are + * not writable by our userid, but we assume that's OK. + */ + flags = PG_BINARY; + if (!isdir) + flags |= O_RDWR; + else + flags |= O_RDONLY; + + /* + * Open the file, silently ignoring errors about unreadable files (or + * unsupported operations, e.g. opening a directory under Windows), and + * logging others. + */ + fd = open(fname, flags); + if (fd < 0) + { + if (isdir && (errno == EISDIR || errno == EACCES)) + return 0; + fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), + progname, fname, strerror(errno)); + return -1; + } + + returncode = fsync(fd); + + /* + * Some OSes don't allow us to fsync directories at all, so we can ignore + * those errors. Anything else needs to be reported. + */ + if (returncode != 0 && !(isdir && errno == EBADF)) + { + fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), + progname, fname, strerror(errno)); + close(fd); + return -1; + } + + close(fd); + return 0; +} + +/* + * fsync_parent_path -- fsync the parent path of a file or directory + * + * This is aimed at making file operations persistent on disk in case of + * an OS crash or power failure. + */ +static int +fsync_parent_path(const char *fname) +{ + char parentpath[MAXPGPATH]; + + strlcpy(parentpath, fname, MAXPGPATH); + get_parent_directory(parentpath); + + /* + * get_parent_directory() returns an empty string if the input argument is + * just a file name (see comments in path.c), so handle that as being the + * current directory. + */ + if (strlen(parentpath) == 0) + strlcpy(parentpath, ".", MAXPGPATH); + + if (fsync_fname_ext(parentpath, true) != 0) + return -1; + + return 0; +} + +/* + * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability + * + * Wrapper around rename, similar to the backend version. Note that this + * version does not fsync the target file before the rename, as it's unlikely + * to be helpful for current and prospective users. + */ +static int +durable_rename(const char *oldfile, const char *newfile) +{ + /* + * First fsync the old path, to ensure that it is properly persistent on + * disk. + */ + if (fsync_fname_ext(oldfile, false) != 0) + return -1; + + /* Time to do the real deal... */ + if (rename(oldfile, newfile) != 0) + { + fprintf(stderr, _("%s: could not rename file \"%s\" to \"%s\": %s\n"), + progname, oldfile, newfile, strerror(errno)); + return -1; + } + + /* + * To guarantee renaming the file is persistent, fsync the file with its + * new name, and its containing directory. + */ + if (fsync_fname_ext(newfile, false) != 0) + return -1; + + if (fsync_parent_path(newfile) != 0) + return -1; + + return 0; +} + +/* * Receive a log stream starting at the specified position. * * If sysidentifier is specified, validate that both the system