From d8737afb5d368015522b57f502bf1eced4220689 Mon Sep 17 00:00:00 2001 From: "dgrowley@gmail.com" Date: Thu, 22 Apr 2021 17:03:46 +1200 Subject: [PATCH v1] Use simplehash.h hashtables in SMgr The hash table lookups done in SMgr can quite often be a bottleneck during crash recovery. Traditionally these use dynahash. Here we swap dynahash out and use simplehash instead. This improves lookup performance. Some changes are required from simplehash.h here to make this work. The reason for this is that code external to smgr.c does point to the hashed SMgrRelation. Since simplehash does reallocate the bucket array when increasing the size of the table and also shuffle entries around during deletes, code pointing directly into hash entries would be a bad idea. To overcome this issue we only store a pointer to the SMgrRelationData in the hash table entry and maintain a separate allocation for that data. This does mean an additional pointer dereference during lookups, but only when the hash value matches, so the significant majority of the time that will only be done for the item we are actually looking for. Since the hash table key is stored in the referenced SMgrRelation, we need to add two new macros to allow simplehash to allocate the memory for the SMgrEntry during inserts before it tries to set the key. A new macro has also been added to allow simplehash implementations to perform cleanup when items are removed from the table. --- src/backend/storage/smgr/smgr.c | 173 +++++++++++++++++++++++++------- src/include/lib/simplehash.h | 48 ++++++++- 2 files changed, 182 insertions(+), 39 deletions(-) diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 4dc24649df..64a26e06c6 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -18,14 +18,50 @@ #include "postgres.h" #include "access/xlog.h" +#include "common/hashfn.h" #include "lib/ilist.h" #include "storage/bufmgr.h" #include "storage/ipc.h" #include "storage/md.h" #include "storage/smgr.h" -#include "utils/hsearch.h" #include "utils/inval.h" +/* Hash table entry type for SMgrRelationHash */ +typedef struct SMgrEntry +{ + int status; /* Hash table status */ + uint32 hash; /* Hash value (cached) */ + SMgrRelation data; /* Pointer to the SMgrRelationData */ +} SMgrEntry; + +static inline uint32 relfilenodebackend_hash(RelFileNodeBackend *rnode); +static void smgr_entry_cleanup(SMgrRelation reln); + +/* + * Because simplehash.h does not provide a stable pointer to hash table + * entries, we don't make the element type a SMgrRelation directly, instead we + * use an SMgrEntry type which has a pointer to the data field. simplehash can + * move entries around when adding or removing items from the hash table so + * having the SMgrRelation as a pointer inside the SMgrEntry allows external + * code to keep their own pointers to the SMgrRelation. Relcache does this. + * We use the SH_ENTRY_INITIALIZER to allocate memory for the SMgrRelationData + * when a new entry is created. We also define SH_ENTRY_CLEANUP to execute + * some cleanup when removing an item from the table. + */ +#define SH_PREFIX smgrtable +#define SH_ELEMENT_TYPE SMgrEntry +#define SH_KEY_TYPE RelFileNodeBackend +#define SH_KEY data->smgr_rnode +#define SH_HASH_KEY(tb, key) relfilenodebackend_hash(&key) +#define SH_EQUAL(tb, a, b) (memcmp(&a, &b, sizeof(RelFileNodeBackend)) == 0) +#define SH_SCOPE static inline +#define SH_STORE_HASH +#define SH_GET_HASH(tb, a) a->hash +#define SH_ENTRY_INITIALIZER(a) a->data = MemoryContextAlloc(TopMemoryContext, sizeof(SMgrRelationData)) +#define SH_ENTRY_CLEANUP(a) smgr_entry_cleanup(a->data) +#define SH_DEFINE +#define SH_DECLARE +#include "lib/simplehash.h" /* * This struct of function pointers defines the API between smgr.c and @@ -91,13 +127,62 @@ static const int NSmgr = lengthof(smgrsw); * Each backend has a hashtable that stores all extant SMgrRelation objects. * In addition, "unowned" SMgrRelation objects are chained together in a list. */ -static HTAB *SMgrRelationHash = NULL; +static smgrtable_hash *SMgrRelationHash = NULL; static dlist_head unowned_relns; /* local function prototypes */ static void smgrshutdown(int code, Datum arg); +/* + * relfilenodebackend_hash + * Custom rolled hash function for simplehash table. + * + * smgropen() is often a bottleneck in CPU bound workloads during crash + * recovery. We make use of this custom hash function rather than using + * hash_bytes as it gives us a little bit more performance. + * + * XXX What if sizeof(Oid) is not 4? + */ +static inline uint32 +relfilenodebackend_hash(RelFileNodeBackend *rnode) +{ + uint32 hashkey; + + hashkey = murmurhash32((uint32) rnode->node.spcNode); + + /* rotate hashkey left 1 bit at each step */ + hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); + hashkey ^= murmurhash32((uint32) rnode->node.dbNode); + + hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); + hashkey ^= murmurhash32((uint32) rnode->node.relNode); + + hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); + hashkey ^= murmurhash32((uint32) rnode->backend); + + return hashkey; +} + +/* + * smgr_entry_cleanup + * Cleanup code for simplehash.h to execute when removing an item from + * the hash table. + */ +static void +smgr_entry_cleanup(SMgrRelation reln) +{ + /* + * Unhook the owner pointer, if any. We only do this when we're certain + * the entry is removed from the hash table. This allows us to leave the + * owner attached if the hash table delete were to fail for some reason. + */ + if (reln->smgr_owner) + *reln->smgr_owner = NULL; + + pfree(reln); +} + /* * smgrinit(), smgrshutdown() -- Initialize or shut down storage @@ -147,31 +232,26 @@ smgropen(RelFileNode rnode, BackendId backend) { RelFileNodeBackend brnode; SMgrRelation reln; + SMgrEntry *entry; bool found; - if (SMgrRelationHash == NULL) + if (unlikely(SMgrRelationHash == NULL)) { /* First time through: initialize the hash table */ - HASHCTL ctl; - - ctl.keysize = sizeof(RelFileNodeBackend); - ctl.entrysize = sizeof(SMgrRelationData); - SMgrRelationHash = hash_create("smgr relation table", 400, - &ctl, HASH_ELEM | HASH_BLOBS); + SMgrRelationHash = smgrtable_create(TopMemoryContext, 400, NULL); dlist_init(&unowned_relns); } /* Look up or create an entry */ brnode.node = rnode; brnode.backend = backend; - reln = (SMgrRelation) hash_search(SMgrRelationHash, - (void *) &brnode, - HASH_ENTER, &found); + entry = smgrtable_insert(SMgrRelationHash, brnode, &found); + reln = entry->data; /* Initialize it if not present before */ if (!found) { - /* hash_search already filled in the lookup key */ + /* smgrtable_insert already filled in the lookup key */ reln->smgr_owner = NULL; reln->smgr_targblock = InvalidBlockNumber; for (int i = 0; i <= MAX_FORKNUM; ++i) @@ -250,10 +330,11 @@ smgrexists(SMgrRelation reln, ForkNumber forknum) } /* - * smgrclose() -- Close and delete an SMgrRelation object. + * smgrclose() -- Close and delete an SMgrRelation object but don't + * remove from the SMgrRelationHash table. */ -void -smgrclose(SMgrRelation reln) +static inline void +smgrclose_internal(SMgrRelation reln) { SMgrRelation *owner; ForkNumber forknum; @@ -266,17 +347,18 @@ smgrclose(SMgrRelation reln) if (!owner) dlist_delete(&reln->node); - if (hash_search(SMgrRelationHash, - (void *) &(reln->smgr_rnode), - HASH_REMOVE, NULL) == NULL) - elog(ERROR, "SMgrRelation hashtable corrupted"); +} - /* - * Unhook the owner pointer, if any. We do this last since in the remote - * possibility of failure above, the SMgrRelation object will still exist. - */ - if (owner) - *owner = NULL; +/* + * smgrclose() -- Close and delete an SMgrRelation object. + */ +void +smgrclose(SMgrRelation reln) +{ + smgrclose_internal(reln); + + if (!smgrtable_delete(SMgrRelationHash, reln->smgr_rnode)) + elog(ERROR, "SMgrRelation hashtable corrupted"); } /* @@ -285,17 +367,25 @@ smgrclose(SMgrRelation reln) void smgrcloseall(void) { - HASH_SEQ_STATUS status; - SMgrRelation reln; + smgrtable_iterator iterator; + SMgrEntry *entry; /* Nothing to do if hashtable not set up */ - if (SMgrRelationHash == NULL) + if (unlikely(SMgrRelationHash == NULL)) return; - hash_seq_init(&status, SMgrRelationHash); + smgrtable_start_iterate(SMgrRelationHash, &iterator); + while ((entry = smgrtable_iterate(SMgrRelationHash, &iterator)) != NULL) + smgrclose_internal(entry->data); - while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL) - smgrclose(reln); + /* + * Finally, remove all entries from the hash table. This is done last and + * in a single operation as we're unable to remove multiple entries in the + * above loop due to deletes moving elements around in the table. + * Additionally, it is much more efficient to just wipe out all entries + * rather than doing individual deletes of each entry. + */ + smgrtable_truncate(SMgrRelationHash); } /* @@ -309,17 +399,24 @@ smgrcloseall(void) void smgrclosenode(RelFileNodeBackend rnode) { - SMgrRelation reln; + SMgrEntry *entry; /* Nothing to do if hashtable not set up */ if (SMgrRelationHash == NULL) return; + entry = smgrtable_lookup(SMgrRelationHash, rnode); + if (entry != NULL) + { + /* Delete the entry, but skip the hash table delete... */ + smgrclose_internal(entry->data); - reln = (SMgrRelation) hash_search(SMgrRelationHash, - (void *) &rnode, - HASH_FIND, NULL); - if (reln != NULL) - smgrclose(reln); + /* + * ... as we can remove from the hash table directly due to already + * having a pointer to the exact entry we want to delete. This saves + * an additional table lookup. + */ + smgrtable_delete_item(SMgrRelationHash, entry); + } } /* diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h index da51781e98..569104def0 100644 --- a/src/include/lib/simplehash.h +++ b/src/include/lib/simplehash.h @@ -50,6 +50,10 @@ * - SH_HASH_KEY(table, key) - generate hash for the key * - SH_STORE_HASH - if defined the hash is stored in the elements * - SH_GET_HASH(tb, a) - return the field to store the hash in + * - SH_ENTRY_INITIALIZER(a) - if defined, the code in this macro is called + * for new entries + * - SH_ENTRY_CLEANUP(a) - if defined, the code in this macro is called + * when an entry is removed from the hash table. * * The element type is required to contain a "status" member that can store * the range of values defined in the SH_STATUS enum. @@ -115,6 +119,7 @@ #define SH_LOOKUP SH_MAKE_NAME(lookup) #define SH_LOOKUP_HASH SH_MAKE_NAME(lookup_hash) #define SH_GROW SH_MAKE_NAME(grow) +#define SH_TRUNCATE SH_MAKE_NAME(truncate) #define SH_START_ITERATE SH_MAKE_NAME(start_iterate) #define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at) #define SH_ITERATE SH_MAKE_NAME(iterate) @@ -224,6 +229,9 @@ SH_SCOPE void SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry); /* bool _delete(_hash *tb, key) */ SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key); +/* void _truncate(_hash *tb) */ +SH_SCOPE void SH_TRUNCATE(SH_TYPE * tb); + /* void _start_iterate(_hash *tb, _iterator *iter) */ SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter); @@ -634,6 +642,9 @@ restart: if (entry->status == SH_STATUS_EMPTY) { tb->members++; +#ifdef SH_ENTRY_INITIALIZER + SH_ENTRY_INITIALIZER(entry); +#endif entry->SH_KEY = key; #ifdef SH_STORE_HASH SH_GET_HASH(tb, entry) = hash; @@ -721,6 +732,9 @@ restart: /* and fill the now empty spot */ tb->members++; +#ifdef SH_ENTRY_INITIALIZER + SH_ENTRY_INITIALIZER(entry); +#endif entry->SH_KEY = key; #ifdef SH_STORE_HASH @@ -856,7 +870,9 @@ SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key) SH_ELEMENT_TYPE *lastentry = entry; tb->members--; - +#ifdef SH_ENTRY_CLEANUP + SH_ENTRY_CLEANUP(entry); +#endif /* * Backward shift following elements till either an empty element * or an element at its optimal position is encountered. @@ -919,6 +935,9 @@ SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry) curelem = entry - &tb->data[0]; tb->members--; +#ifdef SH_ENTRY_CLEANUP + SH_ENTRY_CLEANUP(entry); +#endif /* * Backward shift following elements till either an empty element or an @@ -959,6 +978,30 @@ SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry) } } +/* + * Remove all entries from the table making the table empty. + */ +SH_SCOPE void +SH_TRUNCATE(SH_TYPE * tb) +{ + int i; + + for (i = 0; i < tb->size; i++) + { + SH_ELEMENT_TYPE *entry = &tb->data[i]; + if (entry->status != SH_STATUS_EMPTY) + { + entry->status = SH_STATUS_EMPTY; + +#ifdef SH_ENTRY_CLEANUP + SH_ENTRY_CLEANUP(entry); +#endif + } + } + + tb->members = 0; +} + /* * Initialize iterator. */ @@ -1133,6 +1176,8 @@ SH_STAT(SH_TYPE * tb) #undef SH_DECLARE #undef SH_DEFINE #undef SH_GET_HASH +#undef SH_ENTRY_INITIALIZER +#undef SH_ENTRY_CLEANUP #undef SH_STORE_HASH #undef SH_USE_NONDEFAULT_ALLOCATOR #undef SH_EQUAL @@ -1166,6 +1211,7 @@ SH_STAT(SH_TYPE * tb) #undef SH_LOOKUP #undef SH_LOOKUP_HASH #undef SH_GROW +#undef SH_TRUNCATE #undef SH_START_ITERATE #undef SH_START_ITERATE_AT #undef SH_ITERATE -- 2.27.0