From 9cc77f50747db58014dae482fe7a9d6b306b0c0b Mon Sep 17 00:00:00 2001 From: Andrey Borodin Date: Sun, 21 Jan 2018 14:43:50 +0500 Subject: [PATCH 1/3] Delete pages during GiST VACUUM v2 --- src/backend/access/gist/gist.c | 7 +++ src/backend/access/gist/gistbuild.c | 5 -- src/backend/access/gist/gistutil.c | 4 +- src/backend/access/gist/gistvacuum.c | 119 +++++++++++++++++++++++++++++++++-- src/backend/access/gist/gistxlog.c | 47 +++++++++++++- src/include/access/gist_private.h | 23 ++++--- src/include/access/gistxlog.h | 16 ++++- src/test/regress/expected/gist.out | 4 +- src/test/regress/sql/gist.sql | 4 +- 9 files changed, 203 insertions(+), 26 deletions(-) diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 51c32e4afe..f7973a2e15 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -694,6 +694,13 @@ gistdoinsert(Relation r, IndexTuple itup, Size freespace, GISTSTATE *giststate) GISTInsertStack *item; OffsetNumber downlinkoffnum; + if(GistPageIsDeleted(stack->page)) + { + UnlockReleaseBuffer(stack->buffer); + xlocked = false; + state.stack = stack = stack->parent; + continue; + } downlinkoffnum = gistchoose(state.r, stack->page, itup, giststate); iid = PageGetItemId(stack->page, downlinkoffnum); idxtuple = (IndexTuple) PageGetItem(stack->page, iid); diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c index d22318a5f1..abb9a642c6 100644 --- a/src/backend/access/gist/gistbuild.c +++ b/src/backend/access/gist/gistbuild.c @@ -1126,11 +1126,6 @@ gistGetMaxLevel(Relation index) * but will be added there the first time we visit them. */ -typedef struct -{ - BlockNumber childblkno; /* hash key */ - BlockNumber parentblkno; -} ParentMapEntry; static void gistInitParentMap(GISTBuildState *buildstate) diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 55cccd247a..52c1b6fa69 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -24,6 +24,7 @@ #include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/syscache.h" +#include "utils/snapmgr.h" /* @@ -801,13 +802,14 @@ gistNewBuffer(Relation r) if (ConditionalLockBuffer(buffer)) { Page page = BufferGetPage(buffer); + PageHeader p = (PageHeader) page; if (PageIsNew(page)) return buffer; /* OK to use, if never initialized */ gistcheckpage(r, buffer); - if (GistPageIsDeleted(page)) + if (GistPageIsDeleted(page) && TransactionIdPrecedes(p->pd_prune_xid, RecentGlobalDataXmin)) return buffer; /* OK to use */ LockBuffer(buffer, GIST_UNLOCK); diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c index 22181c6299..235c51c71b 100644 --- a/src/backend/access/gist/gistvacuum.c +++ b/src/backend/access/gist/gistvacuum.c @@ -20,6 +20,8 @@ #include "miscadmin.h" #include "storage/indexfsm.h" #include "storage/lmgr.h" +#include "utils/snapmgr.h" +#include "access/xact.h" /* @@ -126,7 +128,6 @@ pushStackIfSplited(Page page, GistBDItem *stack) } } - /* * Bulk deletion of all index entries pointing to a set of heap tuples and * check invalid tuples left after upgrade. @@ -136,12 +137,14 @@ pushStackIfSplited(Page page, GistBDItem *stack) * Result: a palloc'd struct containing statistical info for VACUUM displays. */ IndexBulkDeleteResult * -gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, - IndexBulkDeleteCallback callback, void *callback_state) +gistbulkdelete(IndexVacuumInfo * info, IndexBulkDeleteResult * stats, IndexBulkDeleteCallback callback, void* callback_state) { Relation rel = info->index; GistBDItem *stack, *ptr; + BlockNumber recentParent = InvalidBlockNumber; + List *rescanList = NULL; + ListCell *cell; /* first time through? */ if (stats == NULL) @@ -234,9 +237,16 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, END_CRIT_SECTION(); } + if (ntodelete == maxoff && recentParent!=InvalidBlockNumber && + (rescanList == NULL || (BlockNumber)llast_int(rescanList) != recentParent)) + { + /* This page is a candidate to be deleted. Remember it's parent to rescan it later with xlock */ + rescanList = lappend_int(rescanList, recentParent); + } } else { + recentParent = stack->blkno; /* check for split proceeded after look at parent */ pushStackIfSplited(page, stack); @@ -271,5 +281,106 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, vacuum_delay_point(); } + /* rescan inner pages that had empty child pages */ + foreach(cell,rescanList) + { + Buffer buffer; + Page page; + OffsetNumber i, + maxoff; + IndexTuple idxtuple; + ItemId iid; + OffsetNumber todelete[MaxOffsetNumber]; + Buffer buftodelete[MaxOffsetNumber]; + int ntodelete = 0; + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, (BlockNumber)lfirst_int(cell), + RBM_NORMAL, info->strategy); + LockBuffer(buffer, GIST_EXCLUSIVE); + gistcheckpage(rel, buffer); + page = (Page) BufferGetPage(buffer); + + Assert(!GistPageIsLeaf(page)); + + maxoff = PageGetMaxOffsetNumber(page); + + for (i = OffsetNumberNext(FirstOffsetNumber); i <= maxoff; i = OffsetNumberNext(i)) + { + Buffer leafBuffer; + Page leafPage; + + iid = PageGetItemId(page, i); + idxtuple = (IndexTuple) PageGetItem(page, iid); + + leafBuffer = ReadBufferExtended(rel, MAIN_FORKNUM, ItemPointerGetBlockNumber(&(idxtuple->t_tid)), + RBM_NORMAL, info->strategy); + LockBuffer(leafBuffer, GIST_EXCLUSIVE); + gistcheckpage(rel, leafBuffer); + leafPage = (Page) BufferGetPage(leafBuffer); + Assert(GistPageIsLeaf(leafPage)); + + if (PageGetMaxOffsetNumber(leafPage) == InvalidOffsetNumber /* Nothing left to split */ + && !(GistFollowRight(leafPage) || GistPageGetNSN(page) < GistPageGetNSN(leafPage)) /* No follow-right */ + && ntodelete < maxoff-1) /* We must keep at least one leaf page per each */ + { + buftodelete[ntodelete] = leafBuffer; + todelete[ntodelete++] = i; + } + else + UnlockReleaseBuffer(leafBuffer); + } + + + if (ntodelete) + { + /* Drop references from internal page */ + TransactionId txid = GetCurrentTransactionId(); + START_CRIT_SECTION(); + + MarkBufferDirty(buffer); + PageIndexMultiDelete(page, todelete, ntodelete); + + if (RelationNeedsWAL(rel)) + { + XLogRecPtr recptr; + + recptr = gistXLogUpdate(buffer, todelete, ntodelete, NULL, 0, InvalidBuffer); + PageSetLSN(page, recptr); + } + else + PageSetLSN(page, gistGetFakeLSN(rel)); + + /* Mark pages as deleted */ + for (i = 0; i < ntodelete; i++) + { + Page leafPage = (Page)BufferGetPage(buftodelete[i]); + PageHeader header = (PageHeader)leafPage; + + header->pd_prune_xid = txid; + + GistPageSetDeleted(leafPage); + MarkBufferDirty(buftodelete[i]); + stats->pages_deleted++; + + if (RelationNeedsWAL(rel)) + { + XLogRecPtr recptr = gistXLogSetDeleted(rel->rd_node, buftodelete[i], header->pd_prune_xid); + PageSetLSN(leafPage, recptr); + } + else + PageSetLSN(leafPage, gistGetFakeLSN(rel)); + + UnlockReleaseBuffer(buftodelete[i]); + } + END_CRIT_SECTION(); + } + + UnlockReleaseBuffer(buffer); + + vacuum_delay_point(); + } + + list_free(rescanList); + return stats; -} +} \ No newline at end of file diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c index 1e09126978..7d515f3e7e 100644 --- a/src/backend/access/gist/gistxlog.c +++ b/src/backend/access/gist/gistxlog.c @@ -60,6 +60,29 @@ gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id) UnlockReleaseBuffer(buffer); } +static void +gistRedoPageSetDeleted(XLogReaderState *record) +{ + XLogRecPtr lsn = record->EndRecPtr; + gistxlogPageDelete *xldata = (gistxlogPageDelete *) XLogRecGetData(record); + Buffer buffer; + Page page; + PageHeader header; + + if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO) + { + page = (Page) BufferGetPage(buffer); + header = (PageHeader) page; + + header->pd_prune_xid = xldata->id; + GistPageSetDeleted(page); + + PageSetLSN(page, lsn); + MarkBufferDirty(buffer); + + UnlockReleaseBuffer(buffer); + } +} /* * redo any page update (except page split) */ @@ -112,8 +135,8 @@ gistRedoPageUpdateRecord(XLogReaderState *record) data += sizeof(OffsetNumber) * xldata->ntodelete; PageIndexMultiDelete(page, todelete, xldata->ntodelete); - if (GistPageIsLeaf(page)) - GistMarkTuplesDeleted(page); + + GistMarkTuplesDeleted(page); } /* Add new tuples if any */ @@ -324,6 +347,9 @@ gist_redo(XLogReaderState *record) case XLOG_GIST_CREATE_INDEX: gistRedoCreateIndex(record); break; + case XLOG_GIST_PAGE_DELETE: + gistRedoPageSetDeleted(record); + break; default: elog(PANIC, "gist_redo: unknown op code %u", info); } @@ -442,6 +468,23 @@ gistXLogSplit(bool page_is_leaf, return recptr; } + +XLogRecPtr +gistXLogSetDeleted(RelFileNode node, Buffer buffer, TransactionId xid) { + gistxlogPageDelete xlrec; + XLogRecPtr recptr; + + xlrec.id = xid; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageDelete)); + + XLogRegisterBuffer(0, buffer, REGBUF_STANDARD); + /* new tuples */ + recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_DELETE); + return recptr; +} + /* * Write XLOG record describing a page update. The update can include any * number of deletions and/or insertions of tuples on a single index page. diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index 36ed7244ba..fe4b87084e 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -16,6 +16,7 @@ #include "access/amapi.h" #include "access/gist.h" +#include "access/gistxlog.h" #include "access/itup.h" #include "fmgr.h" #include "lib/pairingheap.h" @@ -51,6 +52,11 @@ typedef struct char tupledata[FLEXIBLE_ARRAY_MEMBER]; } GISTNodeBufferPage; +typedef struct +{ + BlockNumber childblkno; /* hash key */ + BlockNumber parentblkno; +} ParentMapEntry; #define BUFFER_PAGE_DATA_OFFSET MAXALIGN(offsetof(GISTNodeBufferPage, tupledata)) /* Returns free space in node buffer page */ #define PAGE_FREE_SPACE(nbp) (nbp->freespace) @@ -176,13 +182,6 @@ typedef struct GISTScanOpaqueData typedef GISTScanOpaqueData *GISTScanOpaque; -/* despite the name, gistxlogPage is not part of any xlog record */ -typedef struct gistxlogPage -{ - BlockNumber blkno; - int num; /* number of index tuples following */ -} gistxlogPage; - /* SplitedPageLayout - gistSplit function result */ typedef struct SplitedPageLayout { @@ -409,6 +408,16 @@ extern bool gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate, extern SplitedPageLayout *gistSplit(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate); +/* gistxlog.c */ +extern void gist_redo(XLogReaderState *record); +extern void gist_desc(StringInfo buf, XLogReaderState *record); +extern const char *gist_identify(uint8 info); +extern void gist_xlog_startup(void); +extern void gist_xlog_cleanup(void); + +extern XLogRecPtr gistXLogSetDeleted(RelFileNode node, Buffer buffer, + TransactionId xid); + extern XLogRecPtr gistXLogUpdate(Buffer buffer, OffsetNumber *todelete, int ntodelete, IndexTuple *itup, int ntup, diff --git a/src/include/access/gistxlog.h b/src/include/access/gistxlog.h index 1a2b9496d0..8df7f4064d 100644 --- a/src/include/access/gistxlog.h +++ b/src/include/access/gistxlog.h @@ -17,12 +17,14 @@ #include "access/xlogreader.h" #include "lib/stringinfo.h" +/* XLog stuff */ + #define XLOG_GIST_PAGE_UPDATE 0x00 /* #define XLOG_GIST_NEW_ROOT 0x20 */ /* not used anymore */ #define XLOG_GIST_PAGE_SPLIT 0x30 /* #define XLOG_GIST_INSERT_COMPLETE 0x40 */ /* not used anymore */ #define XLOG_GIST_CREATE_INDEX 0x50 - /* #define XLOG_GIST_PAGE_DELETE 0x60 */ /* not used anymore */ +#define XLOG_GIST_PAGE_DELETE 0x60 /* * Backup Blk 0: updated page. @@ -59,6 +61,18 @@ typedef struct gistxlogPageSplit */ } gistxlogPageSplit; +typedef struct gistxlogPageDelete +{ + TransactionId id; +} gistxlogPageDelete; + +/* despite the name, gistxlogPage is not part of any xlog record */ +typedef struct gistxlogPage +{ + BlockNumber blkno; + int num; /* number of index tuples following */ +} gistxlogPage; + extern void gist_redo(XLogReaderState *record); extern void gist_desc(StringInfo buf, XLogReaderState *record); extern const char *gist_identify(uint8 info); diff --git a/src/test/regress/expected/gist.out b/src/test/regress/expected/gist.out index f5a2993aaf..5b92f08c74 100644 --- a/src/test/regress/expected/gist.out +++ b/src/test/regress/expected/gist.out @@ -27,9 +27,7 @@ insert into gist_point_tbl (id, p) select g+100000, point(g*10+1, g*10+1) from generate_series(1, 10000) g; -- To test vacuum, delete some entries from all over the index. delete from gist_point_tbl where id % 2 = 1; --- And also delete some concentration of values. (GiST doesn't currently --- attempt to delete pages even when they become empty, but if it did, this --- would exercise it) +-- And also delete some concentration of values. delete from gist_point_tbl where id < 10000; vacuum analyze gist_point_tbl; -- rebuild the index with a different fillfactor diff --git a/src/test/regress/sql/gist.sql b/src/test/regress/sql/gist.sql index bae722fe13..e66396e851 100644 --- a/src/test/regress/sql/gist.sql +++ b/src/test/regress/sql/gist.sql @@ -28,9 +28,7 @@ select g+100000, point(g*10+1, g*10+1) from generate_series(1, 10000) g; -- To test vacuum, delete some entries from all over the index. delete from gist_point_tbl where id % 2 = 1; --- And also delete some concentration of values. (GiST doesn't currently --- attempt to delete pages even when they become empty, but if it did, this --- would exercise it) +-- And also delete some concentration of values. delete from gist_point_tbl where id < 10000; vacuum analyze gist_point_tbl; -- 2.14.3 (Apple Git-98)