From 3e1801d2ffb1fdcb161b41590c29695488195b20 Mon Sep 17 00:00:00 2001 From: Jerry Jelinek Date: Tue, 5 Feb 2019 14:23:02 +0000 Subject: [PATCH] cow filesystem --- doc/src/sgml/config.sgml | 18 +++++ src/backend/access/transam/xlog.c | 95 ++++++++++++++++----------- src/backend/utils/misc/guc.c | 10 +++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/access/xlog.h | 1 + 5 files changed, 88 insertions(+), 37 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 9b7a7388d5..981d39fba3 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3562,6 +3562,24 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows + + wal_cow_fs (boolean) + + wal_cow_fs configuration parameter + + + + + This parameter should only be set to on when the WAL + resides on a Copy-On-Write filesystem. Actions which do not make sense + on a COW filesystem are skipped. For example, recycling WAL files or + zero-filling new WAL files impose a performance penalty on a COW + filesystem. This setting is only appropriate for filesystems which + allocate new disk blocks on every write. + + + + wal_sender_timeout (integer) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a9f3272849..9b6632af0d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -94,6 +94,7 @@ bool wal_log_hints = false; bool wal_compression = false; char *wal_consistency_checking_string = NULL; bool *wal_consistency_checking = NULL; +bool wal_cow_fs = false; bool log_checkpoints = false; int sync_method = DEFAULT_SYNC_METHOD; int wal_level = WAL_LEVEL_MINIMAL; @@ -3216,6 +3217,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) XLogSegNo max_segno; int fd; int nbytes; + bool fail = false; XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size); @@ -3255,39 +3257,56 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock) (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", tmppath))); - /* - * Zero-fill the file. We have to do this the hard way to ensure that all - * the file space has really been allocated --- on platforms that allow - * "holes" in files, just seeking to the end doesn't allocate intermediate - * space. This way, we know that we have all the space and (after the - * fsync below) that all the indirect blocks are down on disk. Therefore, - * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the - * log file. - */ memset(zbuffer.data, 0, XLOG_BLCKSZ); - for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) - { + + if (!wal_cow_fs) { + /* + * Zero-fill the file. We have to do this the hard way to ensure that + * all the file space has really been allocated --- on platforms that + * allow "holes" in files, just seeking to the end doesn't allocate + * intermediate space. This way, we know that we have all the space + * and (after the fsync below) that all the indirect blocks are down on + * disk. Therefore, fdatasync(2) or O_DSYNC will be sufficient to sync + * future writes to the log file. + */ + for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ) + { + errno = 0; + pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); + if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) + fail = true; + pgstat_report_wait_end(); + if (fail) + break; + } + } + else + { errno = 0; pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE); - if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) - { - int save_errno = errno; + if (lseek(fd, wal_segment_size - 1, SEEK_SET) == -1 || + (int) write(fd, zbuffer.data, 1) != (int) 1) + fail = true; + pgstat_report_wait_end(); + } - /* - * If we fail to make the file, delete it to release disk space - */ - unlink(tmppath); + if (fail) + { + int save_errno = errno; - close(fd); + /* + * If we fail to make the file, delete it to release disk space + */ + unlink(tmppath); - /* if write didn't set errno, assume problem is no disk space */ - errno = save_errno ? save_errno : ENOSPC; + close(fd); - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write to file \"%s\": %m", tmppath))); - } - pgstat_report_wait_end(); + /* if write didn't set errno, assume problem is no disk space */ + errno = save_errno ? save_errno : ENOSPC; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write to file \"%s\": %m", tmppath))); } pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC); @@ -4050,17 +4069,19 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr) char newpath[MAXPGPATH]; #endif struct stat statbuf; - XLogSegNo endlogSegNo; - XLogSegNo recycleSegNo; + XLogSegNo endlogSegNo = 0; + XLogSegNo recycleSegNo = 0; - /* - * Initialize info about where to try to recycle to. - */ - XLByteToSeg(endptr, endlogSegNo, wal_segment_size); - if (RedoRecPtr == InvalidXLogRecPtr) - recycleSegNo = endlogSegNo + 10; - else - recycleSegNo = XLOGfileslop(RedoRecPtr); + if (!wal_cow_fs) { + /* + * Initialize info about where to try to recycle to. + */ + XLByteToSeg(endptr, endlogSegNo, wal_segment_size); + if (RedoRecPtr == InvalidXLogRecPtr) + recycleSegNo = endlogSegNo + 10; + else + recycleSegNo = XLOGfileslop(RedoRecPtr); + } snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname); @@ -4069,7 +4090,7 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr) * segment. Only recycle normal files, pg_standby for example can create * symbolic links pointing to a separate archive directory. */ - if (endlogSegNo <= recycleSegNo && + if (!wal_cow_fs && endlogSegNo <= recycleSegNo && lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) && InstallXLogFileSegment(&endlogSegNo, path, true, recycleSegNo, true)) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 8681ada33a..a1918f57aa 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1176,6 +1176,16 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"wal_cow_fs", PGC_SUSET, WAL_SETTINGS, + gettext_noop("WAL on Copy-On-Write file system."), + NULL + }, + &wal_cow_fs, + false, + NULL, NULL, NULL + }, + { {"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT, gettext_noop("Logs each checkpoint."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index c7f53470df..fcdc5fc99f 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -206,6 +206,7 @@ #wal_compression = off # enable compression of full-page writes #wal_log_hints = off # also do full page writes of non-critical updates # (change requires restart) +#wal_cow_fs = off # WAL not on Copy-On-Write file system #wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers # (change requires restart) #wal_writer_delay = 200ms # 1-10000 milliseconds diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index f90a6a9139..a1c0ece8ed 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -116,6 +116,7 @@ extern bool EnableHotStandby; extern bool fullPageWrites; extern bool wal_log_hints; extern bool wal_compression; +extern bool wal_cow_fs; extern bool *wal_consistency_checking; extern char *wal_consistency_checking_string; extern bool log_checkpoints; -- 2.15.1