From fe71350b0874adbec97c612e7b80e7179c6c48e9 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Tue, 31 Jan 2023 20:01:49 +1300 Subject: [PATCH 1/2] Lock pg_control while reading or writing. Front-end programs that read pg_control and user-facing functions run in the backend read pg_control without acquiring ControlFileLock. If you're unlucky enough to read() while the backend is in write(), on at least Linux ext4 you might see partial data. Use a POSIX advisory lock to avoid this problem. --- src/common/controldata_utils.c | 69 ++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/src/common/controldata_utils.c b/src/common/controldata_utils.c index 9723587466..284895bdd9 100644 --- a/src/common/controldata_utils.c +++ b/src/common/controldata_utils.c @@ -39,6 +39,42 @@ #include "storage/fd.h" #endif +/* + * Advisory-lock the control file until closed. + */ +static int +lock_file(int fd, bool exclusive) +{ +#ifdef WIN32 + /* + * LockFile() might work, but it seems dangerous because there are reports + * that the lock can sometimes linger if a program crashes without + * unlocking. That would prevent recovery after a PANIC, so we'd better + * not use it without more research. Because of the lack of portability, + * this function is kept private to controldata_utils.c. + */ + return 0; +#else + struct flock lock; + int rc; + + memset(&lock, 0, sizeof(lock)); + lock.l_type = exclusive ? F_WRLCK : F_RDLCK; + lock.l_start = 0; + lock.l_whence = SEEK_SET; + lock.l_len = 0; + lock.l_pid = -1; + + do + { + rc = fcntl(fd, F_SETLKW, &lock); + } + while (rc < 0 && errno == EINTR); + + return rc; +#endif +} + /* * get_controlfile() * @@ -74,6 +110,20 @@ get_controlfile(const char *DataDir, bool *crc_ok_p) ControlFilePath); #endif + /* Make sure we can read the file atomically. */ + if (lock_file(fd, false) < 0) + { +#ifndef FRONTEND + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not lock file \"%s\" for reading: %m", + ControlFilePath))); +#else + pg_fatal("could not lock file \"%s\" for reading: %m", + ControlFilePath); +#endif + } + r = read(fd, ControlFile, sizeof(ControlFileData)); if (r != sizeof(ControlFileData)) { @@ -186,6 +236,25 @@ update_controlfile(const char *DataDir, pg_fatal("could not open file \"%s\": %m", ControlFilePath); #endif + /* + * Make sure that any concurrent reader (including frontend programs) can + * read the file atomically. Note that this refers to atomicity of + * concurrent reads and writes. For our assumption of atomicity under + * power failure, see PG_CONTROL_MAX_SAFE_SIZE. + */ + if (lock_file(fd, true) < 0) + { +#ifndef FRONTEND + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not lock file \"%s\" for writing: %m", + ControlFilePath))); +#else + pg_fatal("could not lock file \"%s\" for writing: %m", + ControlFilePath); +#endif + } + errno = 0; #ifndef FRONTEND pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE); -- 2.35.1