From a12c061e612a898c30afeac247155f4fa4150930 Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Mon, 21 Feb 2022 15:41:23 -0800 Subject: [PATCH v3 4/4] WIP: AssertFileNotDeleted(fd). Author: Reviewed-By: Discussion: https://postgr.es/m/ Backpatch: --- src/backend/access/transam/slru.c | 2 + src/backend/access/transam/xlog.c | 2 + src/backend/replication/walreceiver.c | 2 + src/backend/storage/file/fd.c | 83 +++++++++++++++++++++++++++ src/include/storage/fd.h | 1 + 5 files changed, 90 insertions(+) diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index b65cb49d7f..654da64128 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -871,6 +871,8 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata) } } + AssertFileNotDeleted(fd); + errno = 0; pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE); if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 5eabd32cf6..8d0ea6596e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2188,6 +2188,8 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) if (track_wal_io_timing) INSTR_TIME_SET_CURRENT(start); + AssertFileNotDeleted(openLogFile); + pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE); written = pg_pwrite(openLogFile, from, nleft, startoffset); pgstat_report_wait_end(); diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 3c9411e221..37d92c1aba 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -912,6 +912,8 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli) else segbytes = nbytes; + AssertFileNotDeleted(recvFile); + /* OK to write the logs */ errno = 0; diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 24704b6a02..1fc811ec1b 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -93,6 +93,7 @@ #include "common/file_perm.h" #include "common/file_utils.h" #include "common/pg_prng.h" +#include "common/string.h" #include "miscadmin.h" #include "pgstat.h" #include "port/pg_iovec.h" @@ -2073,6 +2074,8 @@ FilePrefetch(File file, off_t offset, int amount, uint32 wait_event_info) if (returnCode < 0) return returnCode; + AssertFileNotDeleted(VfdCache[file].fd); + pgstat_report_wait_start(wait_event_info); returnCode = posix_fadvise(VfdCache[file].fd, offset, amount, POSIX_FADV_WILLNEED); @@ -2103,6 +2106,11 @@ FileWriteback(File file, off_t offset, off_t nbytes, uint32 wait_event_info) if (returnCode < 0) return; + /* + * XXX: can't assert non-use of fd right now, + * ScheduleBufferTagForWriteback can end up writing at a later time. + */ + pgstat_report_wait_start(wait_event_info); pg_flush_data(VfdCache[file].fd, offset, nbytes); pgstat_report_wait_end(); @@ -2128,6 +2136,8 @@ FileRead(File file, char *buffer, int amount, off_t offset, vfdP = &VfdCache[file]; + AssertFileNotDeleted(vfdP->fd); + retry: pgstat_report_wait_start(wait_event_info); returnCode = pg_pread(vfdP->fd, buffer, amount, offset); @@ -2184,6 +2194,8 @@ FileWrite(File file, char *buffer, int amount, off_t offset, vfdP = &VfdCache[file]; + AssertFileNotDeleted(vfdP->fd); + /* * If enforcing temp_file_limit and it's a temp file, check to see if the * write would overrun temp_file_limit, and throw error if so. Note: it's @@ -2276,6 +2288,8 @@ FileSync(File file, uint32 wait_event_info) if (returnCode < 0) return returnCode; + AssertFileNotDeleted(VfdCache[file].fd); + pgstat_report_wait_start(wait_event_info); returnCode = pg_fsync(VfdCache[file].fd); pgstat_report_wait_end(); @@ -2297,6 +2311,8 @@ FileSize(File file) return (off_t) -1; } + AssertFileNotDeleted(VfdCache[file].fd); + return lseek(VfdCache[file].fd, 0, SEEK_END); } @@ -2314,6 +2330,8 @@ FileTruncate(File file, off_t offset, uint32 wait_event_info) if (returnCode < 0) return returnCode; + AssertFileNotDeleted(VfdCache[file].fd); + pgstat_report_wait_start(wait_event_info); returnCode = ftruncate(VfdCache[file].fd, offset); pgstat_report_wait_end(); @@ -3828,6 +3846,71 @@ data_sync_elevel(int elevel) return data_sync_retry ? elevel : PANIC; } +void +AssertFileNotDeleted(int fd) +{ + struct stat statbuf; + int ret; + char deleted_filename[MAXPGPATH]; + bool have_filename = false; + + /* + * fstat shouldn't fail, so it seems ok to error out, even if it's + * just a debugging aid. + * + * XXX: Figure out which operating systems this works on. + */ + ret = fstat(fd, &statbuf); + if (ret != 0) + elog(ERROR, "fstat failed: %m"); + + /* + * On several operating systems st_nlink == 0 indicates that the file has + * been deleted. On some OS/filesystem combinations a deleted file may + * still show up with nlink > 0, but nlink == 0 shouldn't be returned + * spuriously. Hardlinks obviously can prevent this from working, but we + * don't expect any, so that's fine. + */ + if (statbuf.st_nlink > 0) + return; + +#if defined(__linux__) + { + char path[MAXPGPATH]; + const char *const deleted_suffix = " (deleted)"; + + /* + * On linux we can figure out what the file name + */ + sprintf(path, "/proc/self/fd/%d", fd); + ret = readlink(path, deleted_filename, sizeof(deleted_filename) - 1); + + // FIXME: Tolerate most errors here + if (ret == -1) + elog(PANIC, "readlink failed: %m"); + + /* readlink doesn't null terminate */ + deleted_filename[ret] = 0; + have_filename = true; + + /* chop off the " (deleted)" */ + if (pg_str_endswith(deleted_filename, deleted_suffix)) + { + Size len = strlen(deleted_filename); + + deleted_filename[len - strlen(deleted_suffix)] = 0; + } + } +#endif + + if (have_filename) + elog(PANIC, "file descriptor %d for file %s is of a deleted file", + fd, deleted_filename); + else + elog(PANIC, "file descriptor %d is of a deleted file", + fd); +} + /* * A convenience wrapper for pg_pwritev() that retries on partial write. If an * error is returned, it is unspecified how much has been written. diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 69549b000f..31513a965c 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -191,6 +191,7 @@ extern int durable_rename_excl(const char *oldfile, const char *newfile, int log extern void SyncDataDirectory(void); extern int data_sync_elevel(int elevel); +extern void AssertFileNotDeleted(int fd); /* Filename components */ #define PG_TEMP_FILES_DIR "pgsql_tmp" #define PG_TEMP_FILE_PREFIX "pgsql_tmp" -- 2.35.1