From fdaa0873a37545cf42a90b9ede562bcbc2d72947 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Thu, 29 Jun 2023 12:27:52 +1200 Subject: [PATCH 2/2] Use ReadRecentBuffer() for btree root page. The root page of a btree is accessed on every index scan, so it gets very hot. We can measure a speed-up on many workloads by pinning it with ReadRecentBuffer() instead of ReadBuffer(), after remembering where it was last time in the AM-private cache space in rel->rd_amcache. Rearrange the existing use of rd_amcache into a new struct BTAMCacheData. It's likely that we'll find more things to put in there in future work. Discussion: https://postgr.es/m/20230627020546.t6z4tntmj7wmjrfh%40awork3.anarazel.de --- src/backend/access/nbtree/nbtpage.c | 93 +++++++++++++++++++++-------- src/include/access/nbtree.h | 10 ++++ src/tools/pgindent/typedefs.list | 1 + 3 files changed, 80 insertions(+), 24 deletions(-) diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index d78971bfe8..bf270874d2 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -311,6 +311,29 @@ _bt_set_cleanup_info(Relation rel, BlockNumber num_delpages) _bt_relbuf(rel, metabuf); } +/* + * Get our private per-relation cache area. + */ +static inline BTAMCacheData * +_bt_getcache(Relation rel) +{ + BTAMCacheData *amcache; + + if (unlikely(rel->rd_amcache == NULL)) + { + /* Set up cache on first time through. */ + amcache = (BTAMCacheData *) + MemoryContextAlloc(rel->rd_indexcxt, sizeof(*amcache)); + amcache->meta_page_is_valid = false; + amcache->recent_root_buffer = InvalidBuffer; + rel->rd_amcache = amcache; + } + else + amcache = (BTAMCacheData *) rel->rd_amcache; + + return amcache; +} + /* * _bt_getroot() -- Get the root page of the btree. * @@ -350,17 +373,21 @@ _bt_getroot(Relation rel, Relation heaprel, int access) BlockNumber rootblkno; uint32 rootlevel; BTMetaPageData *metad; + BTAMCacheData *amcache; Assert(access == BT_READ || heaprel != NULL); + amcache = _bt_getcache(rel); + /* * Try to use previously-cached metapage data to find the root. This * normally saves one buffer access per index search, which is a very * helpful savings in bufmgr traffic and hence contention. */ - if (rel->rd_amcache != NULL) + if (amcache->meta_page_is_valid) { - metad = (BTMetaPageData *) rel->rd_amcache; + metad = &amcache->meta_page; + /* We shouldn't have cached it if any of these fail */ Assert(metad->btm_magic == BTREE_MAGIC); Assert(metad->btm_version >= BTREE_MIN_VERSION); @@ -373,7 +400,25 @@ _bt_getroot(Relation rel, Relation heaprel, int access) Assert(rootblkno != P_NONE); rootlevel = metad->btm_fastlevel; - rootbuf = _bt_getbuf(rel, rootblkno, BT_READ); + /* Try to find the root page in the buffer it was last seen in. */ + if (BufferIsValid(amcache->recent_root_buffer) && + ReadRecentBuffer(rel->rd_locator, MAIN_FORKNUM, rootblkno, + amcache->recent_root_buffer)) + { + /* + * It's in the same buffer as last time, and we avoided a trip + * through the buffer map. + */ + rootbuf = amcache->recent_root_buffer; + _bt_lockbuf(rel, rootbuf, BT_READ); + _bt_checkpage(rel, rootbuf); + } + else + { + /* Slow path. Remember where it is for next time. */ + rootbuf = _bt_getbuf(rel, rootblkno, BT_READ); + amcache->recent_root_buffer = rootbuf; + } rootpage = BufferGetPage(rootbuf); rootopaque = BTPageGetOpaque(rootpage); @@ -393,10 +438,8 @@ _bt_getroot(Relation rel, Relation heaprel, int access) return rootbuf; } _bt_relbuf(rel, rootbuf); - /* Cache is stale, throw it away */ - if (rel->rd_amcache) - pfree(rel->rd_amcache); - rel->rd_amcache = NULL; + /* Cache is stale, mark it invalid. */ + amcache->meta_page_is_valid = false; } metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); @@ -523,9 +566,8 @@ _bt_getroot(Relation rel, Relation heaprel, int access) /* * Cache the metapage data for next time */ - rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt, - sizeof(BTMetaPageData)); - memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData)); + amcache->meta_page = *metad; + amcache->meta_page_is_valid = true; /* * We are done with the metapage; arrange to release it via first @@ -588,16 +630,16 @@ _bt_gettrueroot(Relation rel) BlockNumber rootblkno; uint32 rootlevel; BTMetaPageData *metad; + BTAMCacheData *amcache; /* * We don't try to use cached metapage data here, since (a) this path is * not performance-critical, and (b) if we are here it suggests our cache * is out-of-date anyway. In light of point (b), it's probably safest to - * actively flush any cached metapage info. + * actively invalidate any cached metapage info. */ - if (rel->rd_amcache) - pfree(rel->rd_amcache); - rel->rd_amcache = NULL; + amcache = _bt_getcache(rel); + amcache->meta_page_is_valid = false; metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); metapg = BufferGetPage(metabuf); @@ -674,9 +716,12 @@ _bt_gettrueroot(Relation rel) int _bt_getrootheight(Relation rel) { + BTAMCacheData *amcache; BTMetaPageData *metad; - if (rel->rd_amcache == NULL) + amcache = _bt_getcache(rel); + + if (!amcache->meta_page_is_valid) { Buffer metabuf; @@ -697,14 +742,13 @@ _bt_getrootheight(Relation rel) /* * Cache the metapage data for next time */ - rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt, - sizeof(BTMetaPageData)); - memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData)); + amcache->meta_page = *metad; + amcache->meta_page_is_valid = true; _bt_relbuf(rel, metabuf); } /* Get cached page */ - metad = (BTMetaPageData *) rel->rd_amcache; + metad = &amcache->meta_page; /* We shouldn't have cached it if any of these fail */ Assert(metad->btm_magic == BTREE_MAGIC); Assert(metad->btm_version >= BTREE_MIN_VERSION); @@ -738,9 +782,11 @@ _bt_getrootheight(Relation rel) void _bt_metaversion(Relation rel, bool *heapkeyspace, bool *allequalimage) { + BTAMCacheData *amcache; BTMetaPageData *metad; - if (rel->rd_amcache == NULL) + amcache = _bt_getcache(rel); + if (!amcache->meta_page_is_valid) { Buffer metabuf; @@ -770,14 +816,13 @@ _bt_metaversion(Relation rel, bool *heapkeyspace, bool *allequalimage) * from version 2 to version 3, both of which are !heapkeyspace * versions. */ - rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt, - sizeof(BTMetaPageData)); - memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData)); + amcache->meta_page = *metad; + amcache->meta_page_is_valid = true; _bt_relbuf(rel, metabuf); } /* Get cached page */ - metad = (BTMetaPageData *) rel->rd_amcache; + metad = &amcache->meta_page; /* We shouldn't have cached it if any of these fail */ Assert(metad->btm_magic == BTREE_MAGIC); Assert(metad->btm_version >= BTREE_MIN_VERSION); diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 8891fa7973..85cab606a3 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -151,6 +151,16 @@ typedef struct BTMetaPageData #define BTREE_MIN_VERSION 2 /* minimum supported version */ #define BTREE_NOVAC_VERSION 3 /* version with all meta fields set */ +/* + * Cache space, stored in rel->rd_amcache. + */ +typedef struct BTAMCacheData +{ + BTMetaPageData meta_page; + bool meta_page_is_valid; + Buffer recent_root_buffer; +} BTAMCacheData; + /* * Maximum size of a btree index entry, including its tuple header. * diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 260854747b..b75d9a5cb2 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -187,6 +187,7 @@ BOOL BOOLEAN BOX BTArrayKeyInfo +BTAMCacheData BTBuildState BTCycleId BTDedupInterval -- 2.40.1