From 3781795f9b4e448df6bdd24d5cd7c0743b5e2944 Mon Sep 17 00:00:00 2001 From: Nathan Bossart Date: Wed, 10 Nov 2021 18:35:14 +0000 Subject: [PATCH v8 1/2] Move WAL segment creation logic to its own function. --- src/backend/access/transam/xlog.c | 103 +-------------------------- src/backend/storage/file/fd.c | 114 ++++++++++++++++++++++++++++++ src/include/storage/fd.h | 1 + 3 files changed, 116 insertions(+), 102 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a7814d4019..87d71e2008 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2918,11 +2918,9 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path) { char tmppath[MAXPGPATH]; - PGAlignedXLogBlock zbuffer; XLogSegNo installed_segno; XLogSegNo max_segno; int fd; - int save_errno; Assert(logtli != 0); @@ -2952,106 +2950,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, elog(DEBUG2, "creating and filling new WAL file"); snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid()); - - unlink(tmppath); - - /* do not use get_sync_bit() here --- want to fsync only at end of fill */ - fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY); - if (fd < 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create file \"%s\": %m", tmppath))); - - memset(zbuffer.data, 0, XLOG_BLCKSZ); - - pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); - save_errno = 0; - if (wal_init_zero) - { - struct iovec iov[PG_IOV_MAX]; - int blocks; - - /* - * Zero-fill the file. With this setting, we do this the hard way to - * ensure that all the file space has really been allocated. On - * platforms that allow "holes" in files, just seeking to the end - * doesn't allocate intermediate space. This way, we know that we - * have all the space and (after the fsync below) that all the - * indirect blocks are down on disk. Therefore, fdatasync(2) or - * O_DSYNC will be sufficient to sync future writes to the log file. - */ - - /* Prepare to write out a lot of copies of our zero buffer at once. */ - for (int i = 0; i < lengthof(iov); ++i) - { - iov[i].iov_base = zbuffer.data; - iov[i].iov_len = XLOG_BLCKSZ; - } - - /* Loop, writing as many blocks as we can for each system call. */ - blocks = wal_segment_size / XLOG_BLCKSZ; - for (int i = 0; i < blocks;) - { - int iovcnt = Min(blocks - i, lengthof(iov)); - off_t offset = i * XLOG_BLCKSZ; - - if (pg_pwritev_with_retry(fd, iov, iovcnt, offset) < 0) - { - save_errno = errno; - break; - } - - i += iovcnt; - } - } - else - { - /* - * Otherwise, seeking to the end and writing a solitary byte is - * enough. - */ - errno = 0; - if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1) - { - /* if write didn't set errno, assume no disk space */ - save_errno = errno ? errno : ENOSPC; - } - } - pgstat_report_wait_end(); - - if (save_errno) - { - /* - * If we fail to make the file, delete it to release disk space - */ - unlink(tmppath); - - close(fd); - - errno = save_errno; - - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", tmppath))); - } - - pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC); - if (pg_fsync(fd) != 0) - { - int save_errno = errno; - - close(fd); - errno = save_errno; - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fsync file \"%s\": %m", tmppath))); - } - pgstat_report_wait_end(); - - if (close(fd) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not close file \"%s\": %m", tmppath))); + CreateEmptyWalSegment(tmppath); /* * Now move the segment into place with its final name. Cope with diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 14b77f2861..4efc46460e 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -3891,3 +3891,117 @@ pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset) return sum; } + +/* + * CreateEmptyWalSegment + * + * Create a new file that can be used as a new WAL segment. The caller is + * responsible for installing the new file in pg_wal. + */ +void +CreateEmptyWalSegment(const char *path) +{ + PGAlignedXLogBlock zbuffer; + int fd; + int save_errno; + + unlink(path); + + /* do not use get_sync_bit() here --- want to fsync only at end of fill */ + fd = BasicOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY); + if (fd < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create file \"%s\": %m", path))); + + memset(zbuffer.data, 0, XLOG_BLCKSZ); + + pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); + save_errno = 0; + if (wal_init_zero) + { + struct iovec iov[PG_IOV_MAX]; + int blocks; + + /* + * Zero-fill the file. With this setting, we do this the hard way to + * ensure that all the file space has really been allocated. On + * platforms that allow "holes" in files, just seeking to the end + * doesn't allocate intermediate space. This way, we know that we + * have all the space and (after the fsync below) that all the + * indirect blocks are down on disk. Therefore, fdatasync(2) or + * O_DSYNC will be sufficient to sync future writes to the log file. + */ + + /* Prepare to write out a lot of copies of our zero buffer at once. */ + for (int i = 0; i < lengthof(iov); ++i) + { + iov[i].iov_base = zbuffer.data; + iov[i].iov_len = XLOG_BLCKSZ; + } + + /* Loop, writing as many blocks as we can for each system call. */ + blocks = wal_segment_size / XLOG_BLCKSZ; + for (int i = 0; i < blocks;) + { + int iovcnt = Min(blocks - i, lengthof(iov)); + off_t offset = i * XLOG_BLCKSZ; + + if (pg_pwritev_with_retry(fd, iov, iovcnt, offset) < 0) + { + save_errno = errno; + break; + } + + i += iovcnt; + } + } + else + { + /* + * Otherwise, seeking to the end and writing a solitary byte is + * enough. + */ + errno = 0; + if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1) + { + /* if write didn't set errno, assume no disk space */ + save_errno = errno ? errno : ENOSPC; + } + } + pgstat_report_wait_end(); + + if (save_errno) + { + /* + * If we fail to make the file, delete it to release disk space + */ + unlink(path); + + close(fd); + + errno = save_errno; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", path))); + } + + pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC); + if (pg_fsync(fd) != 0) + { + int save_errno = errno; + + close(fd); + errno = save_errno; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fsync file \"%s\": %m", path))); + } + pgstat_report_wait_end(); + + if (close(fd) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not close file \"%s\": %m", path))); +} diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 69549b000f..6bb9e3525b 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -190,6 +190,7 @@ extern int durable_unlink(const char *fname, int loglevel); extern int durable_rename_excl(const char *oldfile, const char *newfile, int loglevel); extern void SyncDataDirectory(void); extern int data_sync_elevel(int elevel); +extern void CreateEmptyWalSegment(const char *path); /* Filename components */ #define PG_TEMP_FILES_DIR "pgsql_tmp" -- 2.25.1