diff --git a/configure.in b/configure.in new file mode 100644 index 4ea5699..ef66354 *** a/configure.in --- b/configure.in *************** AC_CHECK_FUNCS(posix_fadvise) *** 1243,1248 **** --- 1243,1250 ---- AC_CHECK_DECLS(posix_fadvise, [], [], [#include ]) fi + AC_CHECK_FUNCS(posix_fallocate) + AC_CHECK_DECLS(fdatasync, [], [], [#include ]) AC_CHECK_DECLS([strlcat, strlcpy]) # This is probably only present on Darwin, but may as well check always diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c new file mode 100644 index 76b52fb..82b6360 *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *************** char *XLogArchiveCommand = NULL; *** 79,84 **** --- 79,85 ---- bool EnableHotStandby = false; bool fullPageWrites = true; bool log_checkpoints = false; + bool wal_use_fallocate = false; int sync_method = DEFAULT_SYNC_METHOD; int wal_level = WAL_LEVEL_MINIMAL; int CommitDelay = 0; /* precommit delay in microseconds */ *************** XLogFileInit(XLogSegNo logsegno, bool *u *** 2284,2299 **** unlink(tmppath); - /* - * Allocate a buffer full of zeros. This is done before opening the file - * so that we don't leak the file descriptor if palloc fails. - * - * Note: palloc zbuffer, instead of just using a local char array, to - * ensure it is reasonably well-aligned; this may save a few cycles - * transferring data to the kernel. - */ - zbuffer = (char *) palloc0(XLOG_BLCKSZ); - /* do not use get_sync_bit() here --- want to fsync only at end of fill */ fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); --- 2285,2290 ---- *************** XLogFileInit(XLogSegNo logsegno, bool *u *** 2302,2339 **** (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", tmppath))); ! /* ! * Zero-fill the file. We have to do this the hard way to ensure that all ! * the file space has really been allocated --- on platforms that allow ! * "holes" in files, just seeking to the end doesn't allocate intermediate ! * space. This way, we know that we have all the space and (after the ! * fsync below) that all the indirect blocks are down on disk. Therefore, ! * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the ! * log file. ! */ ! for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) { ! errno = 0; ! if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) ! { ! int save_errno = errno; ! /* ! * If we fail to make the file, delete it to release disk space ! */ ! unlink(tmppath); close(fd); ! ! /* if write didn't set errno, assume problem is no disk space */ ! errno = save_errno ? save_errno : ENOSPC; ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not write to file \"%s\": %m", tmppath))); } } ! pfree(zbuffer); if (pg_fsync(fd) != 0) { --- 2293,2361 ---- (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", tmppath))); ! if (wal_use_fallocate) { ! errno = posix_fallocate(fd, 0, XLogSegSize); ! if (errno) ! { ! int errno_saved = errno; close(fd); ! unlink(tmppath); ! errno = errno_saved; ereport(ERROR, (errcode_for_file_access(), ! errmsg("could not allocate space for file \"%s\" using posix_fallocate: %m", tmppath))); } } ! else ! { ! /* ! * Allocate a buffer full of zeros. This is done before opening the file ! * so that we don't leak the file descriptor if palloc fails. ! * ! * Note: palloc zbuffer, instead of just using a local char array, to ! * ensure it is reasonably well-aligned; this may save a few cycles ! * transferring data to the kernel. ! */ ! ! zbuffer = (char *) palloc0(XLOG_BLCKSZ); ! ! /* ! * Zero-fill the file. We have to do this the hard way to ensure that all ! * the file space has really been allocated --- on platforms that allow ! * "holes" in files, just seeking to the end doesn't allocate intermediate ! * space. This way, we know that we have all the space and (after the ! * fsync below) that all the indirect blocks are down on disk. Therefore, ! * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the ! * log file. ! */ ! for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ) ! { ! errno = 0; ! if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ) ! { ! int save_errno = errno; ! ! /* ! * If we fail to make the file, delete it to release disk space ! */ ! unlink(tmppath); ! ! close(fd); ! ! /* if write didn't set errno, assume problem is no disk space */ ! errno = save_errno ? save_errno : ENOSPC; ! ! ereport(ERROR, ! (errcode_for_file_access(), ! errmsg("could not write to file \"%s\": %m", tmppath))); ! } ! } ! pfree(zbuffer); ! } if (pg_fsync(fd) != 0) { diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c new file mode 100644 index 22ba35f..821aa6c *** a/src/backend/utils/misc/guc.c --- b/src/backend/utils/misc/guc.c *************** static struct config_bool ConfigureNames *** 1455,1460 **** --- 1455,1476 ---- NULL, NULL, NULL }, + { + {"wal_use_fallocate", + #ifdef USE_POSIX_FALLOCATE + PGC_SIGHUP, + #else + PGC_INTERNAL, + #endif + WAL_SETTINGS, + gettext_noop("WAL writer should use posix_fallocate(3) instead of write(2)."), + NULL, + }, + &wal_use_fallocate, + false, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h new file mode 100644 index f8f06c1..8e98e2c *** a/src/include/access/xlog.h --- b/src/include/access/xlog.h *************** extern char *XLogArchiveCommand; *** 190,195 **** --- 190,196 ---- extern bool EnableHotStandby; extern bool fullPageWrites; extern bool log_checkpoints; + extern bool wal_use_fallocate; /* WAL levels */ typedef enum WalLevel diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in new file mode 100644 index 8aabf3c..033127b *** a/src/include/pg_config.h.in --- b/src/include/pg_config.h.in *************** *** 369,374 **** --- 369,377 ---- /* Define to 1 if you have the `posix_fadvise' function. */ #undef HAVE_POSIX_FADVISE + /* Define to 1 if you have the `posix_fallocate' function. */ + #undef HAVE_POSIX_FALLOCATE + /* Define to 1 if you have the POSIX signal interface. */ #undef HAVE_POSIX_SIGNALS diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h new file mode 100644 index 02bcd92..8815e78 *** a/src/include/pg_config_manual.h --- b/src/include/pg_config_manual.h *************** *** 137,142 **** --- 137,150 ---- #endif /* + * USE_POSIX_FALLOCATE code should be compiled only if we have access + * to posix_fallocate. If necessary you can remove the #define here. + */ + #ifdef HAVE_POSIX_FALLOCATE + #define USE_POSIX_FALLOCATE + #endif + + /* * This is the default directory in which AF_UNIX socket files are * placed. Caution: changing this risks breaking your existing client * applications, which are likely to continue to look in the old