From bd98ac1a2e98bc67238103b0f2764cf7fe0edc58 Mon Sep 17 00:00:00 2001 From: Asim R P Date: Thu, 12 Sep 2019 17:17:29 +0530 Subject: [PATCH v1 1/2] Create restartpoint when replaying drop database Drop database replay involves removing the database directory. We do not have a mechanism similar to invalid page detection for directories during WAL replay. If, due to a crash, WAL replay must resume from a checkpoint, we should avoid replaying WAL records second time, that precede the drop database. Proposed by Paul Guo. --- src/backend/access/rmgrdesc/dbasedesc.c | 6 ++- src/backend/commands/dbcommands.c | 84 +++++++++++++++++++++------------ src/include/access/xlog.h | 1 - src/include/commands/dbcommands_xlog.h | 3 +- 4 files changed, 61 insertions(+), 33 deletions(-) diff --git a/src/backend/access/rmgrdesc/dbasedesc.c b/src/backend/access/rmgrdesc/dbasedesc.c index c7d60ce10d..b97bac2411 100644 --- a/src/backend/access/rmgrdesc/dbasedesc.c +++ b/src/backend/access/rmgrdesc/dbasedesc.c @@ -36,8 +36,10 @@ dbase_desc(StringInfo buf, XLogReaderState *record) { xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) rec; - appendStringInfo(buf, "dir %u/%u", - xlrec->tablespace_id, xlrec->db_id); + int i; + for (i = 0; i < xlrec->nspcids; i++) + appendStringInfo(buf, "\ndir %u/%u", + xlrec->db_id, xlrec->tablespace_ids[i]); } } diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 95881a8550..c0c8726698 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1400,16 +1400,19 @@ movedb(const char *dbname, const char *tblspcname) * Record the filesystem change in XLOG */ { - xl_dbase_drop_rec xlrec; + size_t xlrec_size = sizeof(xl_dbase_drop_rec) + sizeof(Oid); + xl_dbase_drop_rec *xlrec = palloc(xlrec_size); - xlrec.db_id = db_id; - xlrec.tablespace_id = src_tblspcoid; + xlrec->db_id = db_id; + xlrec->nspcids = 1; + xlrec->tablespace_ids[0] = src_tblspcoid; XLogBeginInsert(); - XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec)); + XLogRegisterData((char *) &xlrec, xlrec_size); (void) XLogInsert(RM_DBASE_ID, XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE); + pfree(xlrec); } /* Now it's safe to release the database lock */ @@ -1914,6 +1917,8 @@ remove_dbtablespaces(Oid db_id) Relation rel; TableScanDesc scan; HeapTuple tuple; + List *dstpaths = NIL; + List *spcoids = NIL; rel = table_open(TableSpaceRelationId, AccessShareLock); scan = table_beginscan_catalog(rel, 0, NULL); @@ -1936,31 +1941,39 @@ remove_dbtablespaces(Oid db_id) pfree(dstpath); continue; } + dstpaths = lappend(dstpaths, dstpath); + spcoids = lappend_oid(spcoids, dsttablespace); + } + + table_endscan(scan); + table_close(rel, AccessShareLock); + + size_t xlrec_size = (sizeof(xl_dbase_drop_rec) + + sizeof(Oid)*list_length(spcoids)); + xl_dbase_drop_rec *xlrec = palloc(xlrec_size); + xlrec->db_id = db_id; + xlrec->nspcids = list_length(spcoids); - if (!rmtree(dstpath, true)) + int i=0; + const ListCell *cell1, *cell2; + forboth(cell1, dstpaths, cell2, spcoids) + { + char *path = lfirst(cell1); + if (!rmtree(path, true)) ereport(WARNING, (errmsg("some useless files may be left behind in old database directory \"%s\"", - dstpath))); - + path))); + pfree(path); /* Record the filesystem change in XLOG */ - { - xl_dbase_drop_rec xlrec; - - xlrec.db_id = db_id; - xlrec.tablespace_id = dsttablespace; - - XLogBeginInsert(); - XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_drop_rec)); - - (void) XLogInsert(RM_DBASE_ID, - XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE); - } - - pfree(dstpath); + xlrec->tablespace_ids[i++] = lfirst_oid(cell2); } - table_endscan(scan); - table_close(rel, AccessShareLock); + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, xlrec_size); + (void) XLogInsert(RM_DBASE_ID, + XLOG_DBASE_DROP | XLR_SPECIAL_REL_UPDATE); + + pfree(xlrec); } /* @@ -2166,8 +2179,6 @@ dbase_redo(XLogReaderState *record) xl_dbase_drop_rec *xlrec = (xl_dbase_drop_rec *) XLogRecGetData(record); char *dst_path; - dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_id); - if (InHotStandby) { /* @@ -2196,11 +2207,26 @@ dbase_redo(XLogReaderState *record) /* Clean out the xlog relcache too */ XLogDropDatabase(xlrec->db_id); + /* + * If we crash after removing directories, we should avoid replaying + * WAL records prior to the current WAL record (drop database). + * Creating a restartpoint ensures that recovery will start from at + * least this point onwards in the event of a crash / immediate + * shutdown. + */ + RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | + CHECKPOINT_WAIT); + /* And remove the physical files */ - if (!rmtree(dst_path, true)) - ereport(WARNING, - (errmsg("some useless files may be left behind in old database directory \"%s\"", - dst_path))); + int i; + for (i = 0; i < xlrec->nspcids; i++) + { + dst_path = GetDatabasePath(xlrec->db_id, xlrec->tablespace_ids[i]); + if (!rmtree(dst_path, true)) + ereport(WARNING, + (errmsg("some useless files may be left behind in old database directory \"%s\"", + dst_path))); + } if (InHotStandby) { diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index d519252aad..d0582a726b 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -221,7 +221,6 @@ extern bool XLOG_DEBUG; /* These indicate the cause of a checkpoint request */ #define CHECKPOINT_CAUSE_XLOG 0x0080 /* XLOG consumption */ #define CHECKPOINT_CAUSE_TIME 0x0100 /* Elapsed time */ - /* * Flag bits for the record being inserted, set using XLogSetRecordFlags(). */ diff --git a/src/include/commands/dbcommands_xlog.h b/src/include/commands/dbcommands_xlog.h index 46be8a615a..a1654f25e0 100644 --- a/src/include/commands/dbcommands_xlog.h +++ b/src/include/commands/dbcommands_xlog.h @@ -34,7 +34,8 @@ typedef struct xl_dbase_drop_rec { /* Records dropping of a single subdirectory incl. contents */ Oid db_id; - Oid tablespace_id; + uint16 nspcids; + Oid tablespace_ids[0]; } xl_dbase_drop_rec; extern void dbase_redo(XLogReaderState *rptr); -- 2.14.3 (Apple Git-98)